fraudcrawler 0.3.3__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fraudcrawler might be problematic. Click here for more details.
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/PKG-INFO +2 -2
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/README.md +1 -1
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/base/base.py +12 -3
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/base/orchestrator.py +0 -2
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/processing/processor.py +8 -5
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/settings.py +0 -3
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/pyproject.toml +1 -1
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/LICENSE +0 -0
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/__init__.py +0 -0
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/base/__init__.py +0 -0
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/base/client.py +0 -0
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/base/google-languages.json +0 -0
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/base/google-locations.json +0 -0
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/launch_demo_pipeline.py +0 -0
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/processing/__init__.py +0 -0
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/scraping/__init__.py +0 -0
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/scraping/enrich.py +0 -0
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/scraping/serp.py +0 -0
- {fraudcrawler-0.3.3 → fraudcrawler-0.3.4}/fraudcrawler/scraping/zyte.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: fraudcrawler
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: Intelligent Market Monitoring
|
|
5
5
|
Home-page: https://github/open-veanu/fraudcrawler
|
|
6
6
|
License: MIT
|
|
@@ -68,7 +68,7 @@ The location used in SerpAPI ('gl' parameter). `location=Location('Switzerland')
|
|
|
68
68
|
Defines the search depth with the number of results to retrieve and optional enrichment parameters.
|
|
69
69
|
|
|
70
70
|
#### `prompts: List[Prompt]`
|
|
71
|
-
The list of prompts to classify a given product with (multiple) LLM calls. Each prompt object has a `name`, a `context` (used for defining the user prompt), a `system_prompt` (for defining the classification task), `allowed_classes` (a list of possible classes)
|
|
71
|
+
The list of prompts to classify a given product with (multiple) LLM calls. Each prompt object has a `name`, a `context` (used for defining the user prompt), a `system_prompt` (for defining the classification task), and `allowed_classes` (a list of possible classes).
|
|
72
72
|
|
|
73
73
|
```python
|
|
74
74
|
from fraudcrawler import Language, Location, Deepness, Prompt
|
|
@@ -46,7 +46,7 @@ The location used in SerpAPI ('gl' parameter). `location=Location('Switzerland')
|
|
|
46
46
|
Defines the search depth with the number of results to retrieve and optional enrichment parameters.
|
|
47
47
|
|
|
48
48
|
#### `prompts: List[Prompt]`
|
|
49
|
-
The list of prompts to classify a given product with (multiple) LLM calls. Each prompt object has a `name`, a `context` (used for defining the user prompt), a `system_prompt` (for defining the classification task), `allowed_classes` (a list of possible classes)
|
|
49
|
+
The list of prompts to classify a given product with (multiple) LLM calls. Each prompt object has a `name`, a `context` (used for defining the user prompt), a `system_prompt` (for defining the classification task), and `allowed_classes` (a list of possible classes).
|
|
50
50
|
|
|
51
51
|
```python
|
|
52
52
|
from fraudcrawler import Language, Location, Deepness, Prompt
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
from pydantic import
|
|
3
|
+
from pydantic import (
|
|
4
|
+
BaseModel,
|
|
5
|
+
field_validator,
|
|
6
|
+
model_validator,
|
|
7
|
+
)
|
|
4
8
|
from pydantic_settings import BaseSettings
|
|
5
9
|
from typing import List
|
|
6
10
|
|
|
@@ -9,7 +13,6 @@ import aiohttp
|
|
|
9
13
|
from fraudcrawler.settings import (
|
|
10
14
|
GOOGLE_LANGUAGES_FILENAME,
|
|
11
15
|
GOOGLE_LOCATIONS_FILENAME,
|
|
12
|
-
PROCESSOR_DEFAULT_IF_MISSING,
|
|
13
16
|
)
|
|
14
17
|
|
|
15
18
|
logger = logging.getLogger(__name__)
|
|
@@ -111,7 +114,13 @@ class Prompt(BaseModel):
|
|
|
111
114
|
context: str
|
|
112
115
|
system_prompt: str
|
|
113
116
|
allowed_classes: List[int]
|
|
114
|
-
|
|
117
|
+
|
|
118
|
+
@field_validator("allowed_classes", mode="before")
|
|
119
|
+
def check_for_positive_value(cls, val):
|
|
120
|
+
"""Check if all values are positive."""
|
|
121
|
+
if not all(isinstance(i, int) and i >= 0 for i in val):
|
|
122
|
+
raise ValueError("all values in allowed_classes must be positive integers.")
|
|
123
|
+
return val
|
|
115
124
|
|
|
116
125
|
|
|
117
126
|
class AsyncClient:
|
|
@@ -10,7 +10,6 @@ from fraudcrawler.settings import (
|
|
|
10
10
|
DEFAULT_N_ZYTE_WKRS,
|
|
11
11
|
DEFAULT_N_PROC_WKRS,
|
|
12
12
|
)
|
|
13
|
-
from fraudcrawler.settings import PRODUCT_ITEM_DEFAULT_IS_RELEVANT
|
|
14
13
|
from fraudcrawler.base.base import Deepness, Host, Language, Location, Prompt
|
|
15
14
|
from fraudcrawler import SerpApi, Enricher, ZyteApi, Processor
|
|
16
15
|
|
|
@@ -40,7 +39,6 @@ class ProductItem(BaseModel):
|
|
|
40
39
|
# Filtering parameters
|
|
41
40
|
filtered: bool = False
|
|
42
41
|
filtered_at_stage: str | None = None
|
|
43
|
-
is_relevant: int = PRODUCT_ITEM_DEFAULT_IS_RELEVANT
|
|
44
42
|
|
|
45
43
|
|
|
46
44
|
class Orchestrator(ABC):
|
|
@@ -3,7 +3,10 @@ import logging
|
|
|
3
3
|
from openai import AsyncOpenAI
|
|
4
4
|
|
|
5
5
|
from fraudcrawler.base.base import Prompt
|
|
6
|
-
from fraudcrawler.settings import
|
|
6
|
+
from fraudcrawler.settings import (
|
|
7
|
+
PROCESSOR_USER_PROMPT_TEMPLATE,
|
|
8
|
+
PROCESSOR_DEFAULT_IF_MISSING,
|
|
9
|
+
)
|
|
7
10
|
|
|
8
11
|
|
|
9
12
|
logger = logging.getLogger(__name__)
|
|
@@ -54,7 +57,7 @@ class Processor:
|
|
|
54
57
|
description: Product description (often used in the user_prompt).
|
|
55
58
|
|
|
56
59
|
Note:
|
|
57
|
-
This method returns `
|
|
60
|
+
This method returns `PROCESSOR_DEFAULT_IF_MISSING` if:
|
|
58
61
|
- 'name' or 'description' is None
|
|
59
62
|
- an error occurs during the API call
|
|
60
63
|
- if the response isn't in allowed_classes.
|
|
@@ -64,7 +67,7 @@ class Processor:
|
|
|
64
67
|
logger.warning(
|
|
65
68
|
f"Missing required fields for classification: name='{name}', description='{description}'"
|
|
66
69
|
)
|
|
67
|
-
return
|
|
70
|
+
return PROCESSOR_DEFAULT_IF_MISSING
|
|
68
71
|
|
|
69
72
|
# Substitute placeholders in user_prompt with the relevant arguments
|
|
70
73
|
user_prompt = PROCESSOR_USER_PROMPT_TEMPLATE.format(
|
|
@@ -91,7 +94,7 @@ class Processor:
|
|
|
91
94
|
logger.warning(
|
|
92
95
|
f"Classification '{classification}' not in allowed classes {prompt.allowed_classes}"
|
|
93
96
|
)
|
|
94
|
-
return
|
|
97
|
+
return PROCESSOR_DEFAULT_IF_MISSING
|
|
95
98
|
|
|
96
99
|
logger.info(
|
|
97
100
|
f'Classification for "{name}" (prompt={prompt.name}): {classification}'
|
|
@@ -102,4 +105,4 @@ class Processor:
|
|
|
102
105
|
logger.error(
|
|
103
106
|
f'Error classifying product "{name}" with prompt "{prompt.name}": {e}'
|
|
104
107
|
)
|
|
105
|
-
return
|
|
108
|
+
return PROCESSOR_DEFAULT_IF_MISSING
|
|
@@ -22,9 +22,6 @@ PROCESSOR_USER_PROMPT_TEMPLATE = (
|
|
|
22
22
|
"Context: {context}\n\nProduct Details: {name}\n{description}\\n\nRelevance:"
|
|
23
23
|
)
|
|
24
24
|
|
|
25
|
-
# Orchestrator settings
|
|
26
|
-
PRODUCT_ITEM_DEFAULT_IS_RELEVANT = -1
|
|
27
|
-
|
|
28
25
|
# Async settings
|
|
29
26
|
DEFAULT_N_SERP_WKRS = 10
|
|
30
27
|
DEFAULT_N_ZYTE_WKRS = 10
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|