fraudcrawler 0.3.4__tar.gz → 0.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fraudcrawler might be problematic. Click here for more details.
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/PKG-INFO +1 -1
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/base/orchestrator.py +12 -2
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/processing/processor.py +11 -4
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/pyproject.toml +1 -1
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/LICENSE +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/README.md +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/__init__.py +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/base/__init__.py +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/base/base.py +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/base/client.py +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/base/google-languages.json +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/base/google-locations.json +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/launch_demo_pipeline.py +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/processing/__init__.py +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/scraping/__init__.py +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/scraping/enrich.py +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/scraping/serp.py +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/scraping/zyte.py +0 -0
- {fraudcrawler-0.3.4 → fraudcrawler-0.3.5}/fraudcrawler/settings.py +0 -0
|
@@ -4,7 +4,12 @@ import logging
|
|
|
4
4
|
from pydantic import BaseModel, Field
|
|
5
5
|
from typing import Dict, List, Set, cast
|
|
6
6
|
|
|
7
|
-
from fraudcrawler.settings import
|
|
7
|
+
from fraudcrawler.settings import (
|
|
8
|
+
PROCESSOR_DEFAULT_MODEL,
|
|
9
|
+
PROCESSOR_DEFAULT_IF_MISSING,
|
|
10
|
+
MAX_RETRIES,
|
|
11
|
+
RETRY_DELAY,
|
|
12
|
+
)
|
|
8
13
|
from fraudcrawler.settings import (
|
|
9
14
|
DEFAULT_N_SERP_WKRS,
|
|
10
15
|
DEFAULT_N_ZYTE_WKRS,
|
|
@@ -67,6 +72,7 @@ class Orchestrator(ABC):
|
|
|
67
72
|
openai_model: str = PROCESSOR_DEFAULT_MODEL,
|
|
68
73
|
max_retries: int = MAX_RETRIES,
|
|
69
74
|
retry_delay: int = RETRY_DELAY,
|
|
75
|
+
default_if_missing: int = PROCESSOR_DEFAULT_IF_MISSING,
|
|
70
76
|
n_serp_wkrs: int = DEFAULT_N_SERP_WKRS,
|
|
71
77
|
n_zyte_wkrs: int = DEFAULT_N_ZYTE_WKRS,
|
|
72
78
|
n_proc_wkrs: int = DEFAULT_N_PROC_WKRS,
|
|
@@ -98,7 +104,11 @@ class Orchestrator(ABC):
|
|
|
98
104
|
self._zyteapi = ZyteApi(
|
|
99
105
|
api_key=zyteapi_key, max_retries=max_retries, retry_delay=retry_delay
|
|
100
106
|
)
|
|
101
|
-
self._processor = Processor(
|
|
107
|
+
self._processor = Processor(
|
|
108
|
+
api_key=openaiapi_key,
|
|
109
|
+
model=openai_model,
|
|
110
|
+
default_if_missing=default_if_missing,
|
|
111
|
+
)
|
|
102
112
|
|
|
103
113
|
# Setup the async framework
|
|
104
114
|
self._n_serp_wkrs = n_serp_wkrs
|
|
@@ -15,15 +15,22 @@ logger = logging.getLogger(__name__)
|
|
|
15
15
|
class Processor:
|
|
16
16
|
"""Processes product data for classification based on a prompt configuration."""
|
|
17
17
|
|
|
18
|
-
def __init__(
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
api_key: str,
|
|
21
|
+
model: str,
|
|
22
|
+
default_if_missing: int = PROCESSOR_DEFAULT_IF_MISSING,
|
|
23
|
+
):
|
|
19
24
|
"""Initializes the Processor.
|
|
20
25
|
|
|
21
26
|
Args:
|
|
22
27
|
api_key: The OpenAI API key.
|
|
23
28
|
model: The OpenAI model to use.
|
|
29
|
+
default_if_missing: The default classification to return if error occurs.
|
|
24
30
|
"""
|
|
25
31
|
self._client = AsyncOpenAI(api_key=api_key)
|
|
26
32
|
self._model = model
|
|
33
|
+
self._default_if_missing = default_if_missing
|
|
27
34
|
|
|
28
35
|
async def _call_openai_api(
|
|
29
36
|
self,
|
|
@@ -67,7 +74,7 @@ class Processor:
|
|
|
67
74
|
logger.warning(
|
|
68
75
|
f"Missing required fields for classification: name='{name}', description='{description}'"
|
|
69
76
|
)
|
|
70
|
-
return
|
|
77
|
+
return self._default_if_missing
|
|
71
78
|
|
|
72
79
|
# Substitute placeholders in user_prompt with the relevant arguments
|
|
73
80
|
user_prompt = PROCESSOR_USER_PROMPT_TEMPLATE.format(
|
|
@@ -94,7 +101,7 @@ class Processor:
|
|
|
94
101
|
logger.warning(
|
|
95
102
|
f"Classification '{classification}' not in allowed classes {prompt.allowed_classes}"
|
|
96
103
|
)
|
|
97
|
-
return
|
|
104
|
+
return self._default_if_missing
|
|
98
105
|
|
|
99
106
|
logger.info(
|
|
100
107
|
f'Classification for "{name}" (prompt={prompt.name}): {classification}'
|
|
@@ -105,4 +112,4 @@ class Processor:
|
|
|
105
112
|
logger.error(
|
|
106
113
|
f'Error classifying product "{name}" with prompt "{prompt.name}": {e}'
|
|
107
114
|
)
|
|
108
|
-
return
|
|
115
|
+
return self._default_if_missing
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|