PyPI - fraudcrawler - Versions diffs - 0.5.7__tar.gz → 0.5.8__tar.gz - Mend

fraudcrawler 0.5.7tar.gz → 0.5.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of fraudcrawler might be problematic. Click here for more details.

Files changed (21) hide show

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: fraudcrawler
-Version: 0.5.7
+Version: 0.5.8
 Summary: Intelligent Market Monitoring
 Home-page: https://github.com/open-veanu/fraudcrawler
 License: MIT

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/base/base.py RENAMED Viewed

@@ -247,31 +247,32 @@ class DomainUtils:
     async def _unblock_url(self, url: str, zyte_api: "ZyteAPI") -> bytes | None:
         """Attempts to unblock a URL using Zyte proxy mode when direct access fails.
         This method is specifically designed to handle 403 Forbidden errors for domains
         that may be blocking requests from certain IP ranges (like cloud providers).
         Args:
             url: The URL to fetch using Zyte proxy mode.
             zyte_api: An instance of ZyteAPI to use for the request.
         Returns:
             The HTML content as bytes if successful, None if failed.
         """
         try:
             logger.info(f"Attempting to unblock URL using Zyte proxy: {url}")
             details = await zyte_api.details(url)
             if details and "httpResponseBody" in details:
                 # Decode the base64 content
                 import base64
                 html_content = base64.b64decode(details["httpResponseBody"])
                 logger.info(f"Successfully unblocked URL using Zyte proxy: {url}")
                 return html_content
             else:
                 logger.warning(f"Zyte proxy request failed for URL: {url}")
                 return None
         except Exception as e:
             logger.error(f"Error unblocking URL with Zyte proxy: {url}, error: {e}")
             return None

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/base/orchestrator.py RENAMED Viewed

@@ -114,8 +114,13 @@ class Orchestrator(ABC):
             self._owns_http_client = True
         # Setup the clients
+        self._zyteapi = ZyteAPI(
+            http_client=self._http_client, api_key=self._zyteapi_key
+        )
         self._search = Search(
-            http_client=self._http_client, serpapi_key=self._serpapi_key, zyte_api=self._zyteapi
+            http_client=self._http_client,
+            serpapi_key=self._serpapi_key,
+            zyte_api=self._zyteapi,
         )
         self._enricher = Enricher(
             http_client=self._http_client,
@@ -123,9 +128,6 @@ class Orchestrator(ABC):
             pwd=self._dataforseo_pwd,
         )
         self._url_collector = URLCollector()
-        self._zyteapi = ZyteAPI(
-            http_client=self._http_client, api_key=self._zyteapi_key
-        )
         self._processor = Processor(
             http_client=self._http_client,
             api_key=self._openaiapi_key,

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/scraping/search.py RENAMED Viewed

@@ -450,7 +450,7 @@ class Toppreise(SearchEngine):
         retry.before_sleep = lambda retry_state: self._log_before_sleep(
             search_string=search_string, retry_state=retry_state
         )
         content = None
         try:
             async for attempt in retry:
@@ -463,7 +463,9 @@ class Toppreise(SearchEngine):
                     content = response.content
         except httpx.HTTPStatusError as e:
             if e.response.status_code == 403 and self._zyte_api:
-                logger.warning(f"Received 403 Forbidden for {url}, attempting to unblock with Zyte proxy")
+                logger.warning(
+                    f"Received 403 Forbidden for {url}, attempting to unblock with Zyte proxy"
+                )
                 content = await self._unblock_url(url, self._zyte_api)
                 if content is None:
                     raise e  # Re-raise if zyte fallback also failed
@@ -471,7 +473,7 @@ class Toppreise(SearchEngine):
                 raise e
         if content is None:
-            raise httpx.HTTPStatusError("Failed to fetch content", request=None, response=None)
+            raise httpx.HTTPError("Failed to fetch content")
         # Get external product urls from the content
         urls = self._get_external_product_urls(content=content)

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "fraudcrawler"
-version = "0.5.7"
+version = "0.5.8"
 description = "Intelligent Market Monitoring"
 authors = [
     "Domingo Bertus <hello@veanu.ch>",

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/LICENSE RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/README.md RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/__init__.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/base/__init__.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/base/client.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/base/google-languages.json RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/base/google-locations.json RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/base/retry.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/launch_demo_pipeline.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/processing/__init__.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/processing/processor.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/scraping/__init__.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/scraping/enrich.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/scraping/url.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/scraping/zyte.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.7 → fraudcrawler-0.5.8}/fraudcrawler/settings.py RENAMED Viewed

File without changes

fraudcrawler 0.5.7__tar.gz → 0.5.8__tar.gz

Potentially problematic release.

fraudcrawler 0.5.7tar.gz → 0.5.8tar.gz