fraudcrawler 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fraudcrawler might be problematic. Click here for more details.

@@ -4,7 +4,12 @@ import logging
4
4
  from pydantic import BaseModel, Field
5
5
  from typing import Dict, List, Set, cast
6
6
 
7
- from fraudcrawler.settings import PROCESSOR_DEFAULT_MODEL, MAX_RETRIES, RETRY_DELAY
7
+ from fraudcrawler.settings import (
8
+ PROCESSOR_DEFAULT_MODEL,
9
+ PROCESSOR_DEFAULT_IF_MISSING,
10
+ MAX_RETRIES,
11
+ RETRY_DELAY,
12
+ )
8
13
  from fraudcrawler.settings import (
9
14
  DEFAULT_N_SERP_WKRS,
10
15
  DEFAULT_N_ZYTE_WKRS,
@@ -67,6 +72,7 @@ class Orchestrator(ABC):
67
72
  openai_model: str = PROCESSOR_DEFAULT_MODEL,
68
73
  max_retries: int = MAX_RETRIES,
69
74
  retry_delay: int = RETRY_DELAY,
75
+ default_if_missing: int = PROCESSOR_DEFAULT_IF_MISSING,
70
76
  n_serp_wkrs: int = DEFAULT_N_SERP_WKRS,
71
77
  n_zyte_wkrs: int = DEFAULT_N_ZYTE_WKRS,
72
78
  n_proc_wkrs: int = DEFAULT_N_PROC_WKRS,
@@ -98,7 +104,11 @@ class Orchestrator(ABC):
98
104
  self._zyteapi = ZyteApi(
99
105
  api_key=zyteapi_key, max_retries=max_retries, retry_delay=retry_delay
100
106
  )
101
- self._processor = Processor(api_key=openaiapi_key, model=openai_model)
107
+ self._processor = Processor(
108
+ api_key=openaiapi_key,
109
+ model=openai_model,
110
+ default_if_missing=default_if_missing,
111
+ )
102
112
 
103
113
  # Setup the async framework
104
114
  self._n_serp_wkrs = n_serp_wkrs
@@ -15,15 +15,22 @@ logger = logging.getLogger(__name__)
15
15
  class Processor:
16
16
  """Processes product data for classification based on a prompt configuration."""
17
17
 
18
- def __init__(self, api_key: str, model: str):
18
+ def __init__(
19
+ self,
20
+ api_key: str,
21
+ model: str,
22
+ default_if_missing: int = PROCESSOR_DEFAULT_IF_MISSING,
23
+ ):
19
24
  """Initializes the Processor.
20
25
 
21
26
  Args:
22
27
  api_key: The OpenAI API key.
23
28
  model: The OpenAI model to use.
29
+ default_if_missing: The default classification to return if error occurs.
24
30
  """
25
31
  self._client = AsyncOpenAI(api_key=api_key)
26
32
  self._model = model
33
+ self._default_if_missing = default_if_missing
27
34
 
28
35
  async def _call_openai_api(
29
36
  self,
@@ -67,7 +74,7 @@ class Processor:
67
74
  logger.warning(
68
75
  f"Missing required fields for classification: name='{name}', description='{description}'"
69
76
  )
70
- return PROCESSOR_DEFAULT_IF_MISSING
77
+ return self._default_if_missing
71
78
 
72
79
  # Substitute placeholders in user_prompt with the relevant arguments
73
80
  user_prompt = PROCESSOR_USER_PROMPT_TEMPLATE.format(
@@ -94,7 +101,7 @@ class Processor:
94
101
  logger.warning(
95
102
  f"Classification '{classification}' not in allowed classes {prompt.allowed_classes}"
96
103
  )
97
- return PROCESSOR_DEFAULT_IF_MISSING
104
+ return self._default_if_missing
98
105
 
99
106
  logger.info(
100
107
  f'Classification for "{name}" (prompt={prompt.name}): {classification}'
@@ -105,4 +112,4 @@ class Processor:
105
112
  logger.error(
106
113
  f'Error classifying product "{name}" with prompt "{prompt.name}": {e}'
107
114
  )
108
- return PROCESSOR_DEFAULT_IF_MISSING
115
+ return self._default_if_missing
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: fraudcrawler
3
- Version: 0.3.4
3
+ Version: 0.3.6
4
4
  Summary: Intelligent Market Monitoring
5
- Home-page: https://github/open-veanu/fraudcrawler
5
+ Home-page: https://github.com/open-veanu/fraudcrawler
6
6
  License: MIT
7
7
  Author: Domingo Bertus
8
8
  Author-email: hello@veanu.ch
@@ -17,7 +17,7 @@ Requires-Dist: openai (>=1.68.2,<2.0.0)
17
17
  Requires-Dist: pandas (>=2.2.3,<3.0.0)
18
18
  Requires-Dist: pydantic-settings (>=2.8.1,<3.0.0)
19
19
  Requires-Dist: requests (>=2.32.3,<3.0.0)
20
- Project-URL: Repository, https://github/open-veanu/fraudcrawler
20
+ Project-URL: Repository, https://github.com/open-veanu/fraudcrawler
21
21
  Description-Content-Type: text/markdown
22
22
 
23
23
  # open-veanu/fraudcrawler
@@ -4,17 +4,17 @@ fraudcrawler/base/base.py,sha256=KnwOcy35EKyelcgVh95LmOZziWFS6dKlegLK6A96wvg,448
4
4
  fraudcrawler/base/client.py,sha256=GcTUMqLfvweLFdHy6CP9tgxsFQiPkc6KyiLcwLnDiw8,4412
5
5
  fraudcrawler/base/google-languages.json,sha256=z0VtjMCsCcZq11OkCIb9jMDD1p9Ty4lhV7bq4ddYvec,10748
6
6
  fraudcrawler/base/google-locations.json,sha256=UtNu0iSStllvFRTQXMobWKmZR1hKmtgtHftLNgaJTT0,9204345
7
- fraudcrawler/base/orchestrator.py,sha256=la4BgzhzX0KCkEQhtvueT9iJ4RRZR-YG3BMxG58Ko-I,23780
7
+ fraudcrawler/base/orchestrator.py,sha256=Gmryv8l8nB1QUwwjLoZGop2mwKqWYQQORT_96_w5ptA,23981
8
8
  fraudcrawler/launch_demo_pipeline.py,sha256=RIZTtdtZeJPhvSLp1IUjT_nhme_2q6mAGWKoL838E4E,4320
9
9
  fraudcrawler/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- fraudcrawler/processing/processor.py,sha256=Z1pr2X7GEpxlB-mmGIIC72qTuRW5YJU1DQpKsQO_lAs,3770
10
+ fraudcrawler/processing/processor.py,sha256=IFVKIiNi0QoCAgPFkFtNDgxfhh01iDNUyIBZWACplR8,3993
11
11
  fraudcrawler/scraping/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  fraudcrawler/scraping/enrich.py,sha256=X1BBZshdZqPmbduzhGwH0ULSzq03L_7bf7_UL8yOQ9E,10608
13
13
  fraudcrawler/scraping/serp.py,sha256=wT8vhk0EugcrS2CCvMuCCZrlw1MRI-ahtGYKdNUZQo8,8830
14
14
  fraudcrawler/scraping/zyte.py,sha256=ggI4iYG-E_UyiKgUpEFekeUd1giifEfJ_uyFUSJGSLY,6296
15
15
  fraudcrawler/settings.py,sha256=yAgGvZ9wAdaYbN5c0SBZoTUkjjLOyU2je1109qcbTzQ,723
16
- fraudcrawler-0.3.4.dist-info/LICENSE,sha256=B-3FuHfe3S0fWAlKlceskPcRhzXq81g-rJ-ddUYb4O8,1062
17
- fraudcrawler-0.3.4.dist-info/METADATA,sha256=FslB5x4HVPwRadwg5h83s7dDvJ78lwH4ky62mh1S2cM,5957
18
- fraudcrawler-0.3.4.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
19
- fraudcrawler-0.3.4.dist-info/entry_points.txt,sha256=1Befm7cM6945y2AA1z9V4gZV63mtSWcAs7ypvgux_Xg,79
20
- fraudcrawler-0.3.4.dist-info/RECORD,,
16
+ fraudcrawler-0.3.6.dist-info/LICENSE,sha256=B-3FuHfe3S0fWAlKlceskPcRhzXq81g-rJ-ddUYb4O8,1062
17
+ fraudcrawler-0.3.6.dist-info/METADATA,sha256=jM68uPtxj73vhNjFz5NbzfY40yYGTqGL8JHn0W0oVjk,5965
18
+ fraudcrawler-0.3.6.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
19
+ fraudcrawler-0.3.6.dist-info/entry_points.txt,sha256=1Befm7cM6945y2AA1z9V4gZV63mtSWcAs7ypvgux_Xg,79
20
+ fraudcrawler-0.3.6.dist-info/RECORD,,