PyPI - fraudcrawler - Versions diffs - 0.3.3__tar.gz → 0.3.5__tar.gz - Mend

fraudcrawler 0.3.3tar.gz → 0.3.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of fraudcrawler might be problematic. Click here for more details.

Files changed (19) hide show

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: fraudcrawler
-Version: 0.3.3
+Version: 0.3.5
 Summary: Intelligent Market Monitoring
 Home-page: https://github/open-veanu/fraudcrawler
 License: MIT
@@ -68,7 +68,7 @@ The location used in SerpAPI ('gl' parameter). `location=Location('Switzerland')
 Defines the search depth with the number of results to retrieve and optional enrichment parameters.
 #### `prompts: List[Prompt]`
-The list of prompts to classify a given product with (multiple) LLM calls. Each prompt object has a `name`, a `context` (used for defining the user prompt), a `system_prompt` (for defining the classification task), `allowed_classes` (a list of possible classes) and optionally `default_if_missing` (a default class if anything goes wrong).
+The list of prompts to classify a given product with (multiple) LLM calls. Each prompt object has a `name`, a `context` (used for defining the user prompt), a `system_prompt` (for defining the classification task), and `allowed_classes` (a list of possible classes).
 ```python
 from fraudcrawler import Language, Location, Deepness, Prompt

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/README.md RENAMED Viewed

@@ -46,7 +46,7 @@ The location used in SerpAPI ('gl' parameter). `location=Location('Switzerland')
 Defines the search depth with the number of results to retrieve and optional enrichment parameters.
 #### `prompts: List[Prompt]`
-The list of prompts to classify a given product with (multiple) LLM calls. Each prompt object has a `name`, a `context` (used for defining the user prompt), a `system_prompt` (for defining the classification task), `allowed_classes` (a list of possible classes) and optionally `default_if_missing` (a default class if anything goes wrong).
+The list of prompts to classify a given product with (multiple) LLM calls. Each prompt object has a `name`, a `context` (used for defining the user prompt), a `system_prompt` (for defining the classification task), and `allowed_classes` (a list of possible classes).
 ```python
 from fraudcrawler import Language, Location, Deepness, Prompt

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/base/base.py RENAMED Viewed

@@ -1,6 +1,10 @@
 import json
 import logging
-from pydantic import BaseModel, field_validator, model_validator
+from pydantic import (
+    BaseModel,
+    field_validator,
+    model_validator,
+)
 from pydantic_settings import BaseSettings
 from typing import List
@@ -9,7 +13,6 @@ import aiohttp
 from fraudcrawler.settings import (
     GOOGLE_LANGUAGES_FILENAME,
     GOOGLE_LOCATIONS_FILENAME,
-    PROCESSOR_DEFAULT_IF_MISSING,
 )
 logger = logging.getLogger(__name__)
@@ -111,7 +114,13 @@ class Prompt(BaseModel):
     context: str
     system_prompt: str
     allowed_classes: List[int]
-    default_if_missing: int = PROCESSOR_DEFAULT_IF_MISSING
+    @field_validator("allowed_classes", mode="before")
+    def check_for_positive_value(cls, val):
+        """Check if all values are positive."""
+        if not all(isinstance(i, int) and i >= 0 for i in val):
+            raise ValueError("all values in allowed_classes must be positive integers.")
+        return val
 class AsyncClient:

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/base/orchestrator.py RENAMED Viewed

@@ -4,13 +4,17 @@ import logging
 from pydantic import BaseModel, Field
 from typing import Dict, List, Set, cast
-from fraudcrawler.settings import PROCESSOR_DEFAULT_MODEL, MAX_RETRIES, RETRY_DELAY
+from fraudcrawler.settings import (
+    PROCESSOR_DEFAULT_MODEL,
+    PROCESSOR_DEFAULT_IF_MISSING,
+    MAX_RETRIES,
+    RETRY_DELAY,
+)
 from fraudcrawler.settings import (
     DEFAULT_N_SERP_WKRS,
     DEFAULT_N_ZYTE_WKRS,
     DEFAULT_N_PROC_WKRS,
 )
-from fraudcrawler.settings import PRODUCT_ITEM_DEFAULT_IS_RELEVANT
 from fraudcrawler.base.base import Deepness, Host, Language, Location, Prompt
 from fraudcrawler import SerpApi, Enricher, ZyteApi, Processor
@@ -40,7 +44,6 @@ class ProductItem(BaseModel):
     # Filtering parameters
     filtered: bool = False
     filtered_at_stage: str | None = None
-    is_relevant: int = PRODUCT_ITEM_DEFAULT_IS_RELEVANT
 class Orchestrator(ABC):
@@ -69,6 +72,7 @@ class Orchestrator(ABC):
         openai_model: str = PROCESSOR_DEFAULT_MODEL,
         max_retries: int = MAX_RETRIES,
         retry_delay: int = RETRY_DELAY,
+        default_if_missing: int = PROCESSOR_DEFAULT_IF_MISSING,
         n_serp_wkrs: int = DEFAULT_N_SERP_WKRS,
         n_zyte_wkrs: int = DEFAULT_N_ZYTE_WKRS,
         n_proc_wkrs: int = DEFAULT_N_PROC_WKRS,
@@ -100,7 +104,11 @@ class Orchestrator(ABC):
         self._zyteapi = ZyteApi(
             api_key=zyteapi_key, max_retries=max_retries, retry_delay=retry_delay
         )
-        self._processor = Processor(api_key=openaiapi_key, model=openai_model)
+        self._processor = Processor(
+            api_key=openaiapi_key,
+            model=openai_model,
+            default_if_missing=default_if_missing,
+        )
         # Setup the async framework
         self._n_serp_wkrs = n_serp_wkrs

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/processing/processor.py RENAMED Viewed

@@ -3,7 +3,10 @@ import logging
 from openai import AsyncOpenAI
 from fraudcrawler.base.base import Prompt
-from fraudcrawler.settings import PROCESSOR_USER_PROMPT_TEMPLATE
+from fraudcrawler.settings import (
+    PROCESSOR_USER_PROMPT_TEMPLATE,
+    PROCESSOR_DEFAULT_IF_MISSING,
+)
 logger = logging.getLogger(__name__)
@@ -12,15 +15,22 @@ logger = logging.getLogger(__name__)
 class Processor:
     """Processes product data for classification based on a prompt configuration."""
-    def __init__(self, api_key: str, model: str):
+    def __init__(
+        self,
+        api_key: str,
+        model: str,
+        default_if_missing: int = PROCESSOR_DEFAULT_IF_MISSING,
+    ):
         """Initializes the Processor.
         Args:
             api_key: The OpenAI API key.
             model: The OpenAI model to use.
+            default_if_missing: The default classification to return if error occurs.
         """
         self._client = AsyncOpenAI(api_key=api_key)
         self._model = model
+        self._default_if_missing = default_if_missing
     async def _call_openai_api(
         self,
@@ -54,7 +64,7 @@ class Processor:
             description: Product description (often used in the user_prompt).
         Note:
-            This method returns `prompt.default_if_missing` if:
+            This method returns `PROCESSOR_DEFAULT_IF_MISSING` if:
                 - 'name' or 'description' is None
                 - an error occurs during the API call
                 - if the response isn't in allowed_classes.
@@ -64,7 +74,7 @@ class Processor:
             logger.warning(
                 f"Missing required fields for classification: name='{name}', description='{description}'"
             )
-            return prompt.default_if_missing
+            return self._default_if_missing
         # Substitute placeholders in user_prompt with the relevant arguments
         user_prompt = PROCESSOR_USER_PROMPT_TEMPLATE.format(
@@ -91,7 +101,7 @@ class Processor:
                 logger.warning(
                     f"Classification '{classification}' not in allowed classes {prompt.allowed_classes}"
                 )
-                return prompt.default_if_missing
+                return self._default_if_missing
             logger.info(
                 f'Classification for "{name}" (prompt={prompt.name}): {classification}'
@@ -102,4 +112,4 @@ class Processor:
             logger.error(
                 f'Error classifying product "{name}" with prompt "{prompt.name}": {e}'
             )
-            return prompt.default_if_missing
+            return self._default_if_missing

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/settings.py RENAMED Viewed

@@ -22,9 +22,6 @@ PROCESSOR_USER_PROMPT_TEMPLATE = (
     "Context: {context}\n\nProduct Details: {name}\n{description}\\n\nRelevance:"
 )
-# Orchestrator settings
-PRODUCT_ITEM_DEFAULT_IS_RELEVANT = -1
 # Async settings
 DEFAULT_N_SERP_WKRS = 10
 DEFAULT_N_ZYTE_WKRS = 10

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "fraudcrawler"
-version = "0.3.3"
+version = "0.3.5"
 description = "Intelligent Market Monitoring"
 authors = [
     "Domingo Bertus <hello@veanu.ch>",

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/LICENSE RENAMED Viewed

File without changes

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/__init__.py RENAMED Viewed

File without changes

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/base/__init__.py RENAMED Viewed

File without changes

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/base/client.py RENAMED Viewed

File without changes

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/base/google-languages.json RENAMED Viewed

File without changes

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/base/google-locations.json RENAMED Viewed

File without changes

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/launch_demo_pipeline.py RENAMED Viewed

File without changes

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/processing/__init__.py RENAMED Viewed

File without changes

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/scraping/__init__.py RENAMED Viewed

File without changes

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/scraping/enrich.py RENAMED Viewed

File without changes

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/scraping/serp.py RENAMED Viewed

File without changes

{fraudcrawler-0.3.3 → fraudcrawler-0.3.5}/fraudcrawler/scraping/zyte.py RENAMED Viewed

File without changes

fraudcrawler 0.3.3__tar.gz → 0.3.5__tar.gz

Potentially problematic release.

fraudcrawler 0.3.3tar.gz → 0.3.5tar.gz