fraudcrawler 0.3.3__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fraudcrawler might be problematic. Click here for more details.

Files changed (19) hide show
  1. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/PKG-INFO +2 -2
  2. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/README.md +1 -1
  3. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/base/base.py +12 -3
  4. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/base/orchestrator.py +0 -2
  5. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/processing/processor.py +8 -5
  6. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/settings.py +0 -3
  7. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/pyproject.toml +1 -1
  8. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/LICENSE +0 -0
  9. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/__init__.py +0 -0
  10. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/base/__init__.py +0 -0
  11. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/base/client.py +0 -0
  12. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/base/google-languages.json +0 -0
  13. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/base/google-locations.json +0 -0
  14. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/launch_demo_pipeline.py +0 -0
  15. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/processing/__init__.py +0 -0
  16. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/scraping/__init__.py +0 -0
  17. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/scraping/enrich.py +0 -0
  18. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/scraping/serp.py +0 -0
  19. {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/scraping/zyte.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: fraudcrawler
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Intelligent Market Monitoring
5
5
  Home-page: https://github/open-veanu/fraudcrawler
6
6
  License: MIT
@@ -68,7 +68,7 @@ The location used in SerpAPI ('gl' parameter). `location=Location('Switzerland')
68
68
  Defines the search depth with the number of results to retrieve and optional enrichment parameters.
69
69
 
70
70
  #### `prompts: List[Prompt]`
71
- The list of prompts to classify a given product with (multiple) LLM calls. Each prompt object has a `name`, a `context` (used for defining the user prompt), a `system_prompt` (for defining the classification task), `allowed_classes` (a list of possible classes) and optionally `default_if_missing` (a default class if anything goes wrong).
71
+ The list of prompts to classify a given product with (multiple) LLM calls. Each prompt object has a `name`, a `context` (used for defining the user prompt), a `system_prompt` (for defining the classification task), and `allowed_classes` (a list of possible classes).
72
72
 
73
73
  ```python
74
74
  from fraudcrawler import Language, Location, Deepness, Prompt
@@ -46,7 +46,7 @@ The location used in SerpAPI ('gl' parameter). `location=Location('Switzerland')
46
46
  Defines the search depth with the number of results to retrieve and optional enrichment parameters.
47
47
 
48
48
  #### `prompts: List[Prompt]`
49
- The list of prompts to classify a given product with (multiple) LLM calls. Each prompt object has a `name`, a `context` (used for defining the user prompt), a `system_prompt` (for defining the classification task), `allowed_classes` (a list of possible classes) and optionally `default_if_missing` (a default class if anything goes wrong).
49
+ The list of prompts to classify a given product with (multiple) LLM calls. Each prompt object has a `name`, a `context` (used for defining the user prompt), a `system_prompt` (for defining the classification task), and `allowed_classes` (a list of possible classes).
50
50
 
51
51
  ```python
52
52
  from fraudcrawler import Language, Location, Deepness, Prompt
@@ -1,6 +1,10 @@
1
1
  import json
2
2
  import logging
3
- from pydantic import BaseModel, field_validator, model_validator
3
+ from pydantic import (
4
+ BaseModel,
5
+ field_validator,
6
+ model_validator,
7
+ )
4
8
  from pydantic_settings import BaseSettings
5
9
  from typing import List
6
10
 
@@ -9,7 +13,6 @@ import aiohttp
9
13
  from fraudcrawler.settings import (
10
14
  GOOGLE_LANGUAGES_FILENAME,
11
15
  GOOGLE_LOCATIONS_FILENAME,
12
- PROCESSOR_DEFAULT_IF_MISSING,
13
16
  )
14
17
 
15
18
  logger = logging.getLogger(__name__)
@@ -111,7 +114,13 @@ class Prompt(BaseModel):
111
114
  context: str
112
115
  system_prompt: str
113
116
  allowed_classes: List[int]
114
- default_if_missing: int = PROCESSOR_DEFAULT_IF_MISSING
117
+
118
+ @field_validator("allowed_classes", mode="before")
119
+ def check_for_positive_value(cls, val):
120
+ """Check if all values are positive."""
121
+ if not all(isinstance(i, int) and i >= 0 for i in val):
122
+ raise ValueError("all values in allowed_classes must be positive integers.")
123
+ return val
115
124
 
116
125
 
117
126
  class AsyncClient:
@@ -10,7 +10,6 @@ from fraudcrawler.settings import (
10
10
  DEFAULT_N_ZYTE_WKRS,
11
11
  DEFAULT_N_PROC_WKRS,
12
12
  )
13
- from fraudcrawler.settings import PRODUCT_ITEM_DEFAULT_IS_RELEVANT
14
13
  from fraudcrawler.base.base import Deepness, Host, Language, Location, Prompt
15
14
  from fraudcrawler import SerpApi, Enricher, ZyteApi, Processor
16
15
 
@@ -40,7 +39,6 @@ class ProductItem(BaseModel):
40
39
  # Filtering parameters
41
40
  filtered: bool = False
42
41
  filtered_at_stage: str | None = None
43
- is_relevant: int = PRODUCT_ITEM_DEFAULT_IS_RELEVANT
44
42
 
45
43
 
46
44
  class Orchestrator(ABC):
@@ -3,7 +3,10 @@ import logging
3
3
  from openai import AsyncOpenAI
4
4
 
5
5
  from fraudcrawler.base.base import Prompt
6
- from fraudcrawler.settings import PROCESSOR_USER_PROMPT_TEMPLATE
6
+ from fraudcrawler.settings import (
7
+ PROCESSOR_USER_PROMPT_TEMPLATE,
8
+ PROCESSOR_DEFAULT_IF_MISSING,
9
+ )
7
10
 
8
11
 
9
12
  logger = logging.getLogger(__name__)
@@ -54,7 +57,7 @@ class Processor:
54
57
  description: Product description (often used in the user_prompt).
55
58
 
56
59
  Note:
57
- This method returns `prompt.default_if_missing` if:
60
+ This method returns `PROCESSOR_DEFAULT_IF_MISSING` if:
58
61
  - 'name' or 'description' is None
59
62
  - an error occurs during the API call
60
63
  - if the response isn't in allowed_classes.
@@ -64,7 +67,7 @@ class Processor:
64
67
  logger.warning(
65
68
  f"Missing required fields for classification: name='{name}', description='{description}'"
66
69
  )
67
- return prompt.default_if_missing
70
+ return PROCESSOR_DEFAULT_IF_MISSING
68
71
 
69
72
  # Substitute placeholders in user_prompt with the relevant arguments
70
73
  user_prompt = PROCESSOR_USER_PROMPT_TEMPLATE.format(
@@ -91,7 +94,7 @@ class Processor:
91
94
  logger.warning(
92
95
  f"Classification '{classification}' not in allowed classes {prompt.allowed_classes}"
93
96
  )
94
- return prompt.default_if_missing
97
+ return PROCESSOR_DEFAULT_IF_MISSING
95
98
 
96
99
  logger.info(
97
100
  f'Classification for "{name}" (prompt={prompt.name}): {classification}'
@@ -102,4 +105,4 @@ class Processor:
102
105
  logger.error(
103
106
  f'Error classifying product "{name}" with prompt "{prompt.name}": {e}'
104
107
  )
105
- return prompt.default_if_missing
108
+ return PROCESSOR_DEFAULT_IF_MISSING
@@ -22,9 +22,6 @@ PROCESSOR_USER_PROMPT_TEMPLATE = (
22
22
  "Context: {context}\n\nProduct Details: {name}\n{description}\\n\nRelevance:"
23
23
  )
24
24
 
25
- # Orchestrator settings
26
- PRODUCT_ITEM_DEFAULT_IS_RELEVANT = -1
27
-
28
25
  # Async settings
29
26
  DEFAULT_N_SERP_WKRS = 10
30
27
  DEFAULT_N_ZYTE_WKRS = 10
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "fraudcrawler"
7
- version = "0.3.3"
7
+ version = "0.3.4"
8
8
  description = "Intelligent Market Monitoring"
9
9
  authors = [
10
10
  "Domingo Bertus <hello@veanu.ch>",
File without changes