fraudcrawler 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fraudcrawler might be problematic. Click here for more details.

fraudcrawler/__init__.py CHANGED
@@ -2,7 +2,7 @@ from fraudcrawler.scraping.serp import SerpApi, SearchEngine
2
2
  from fraudcrawler.scraping.enrich import Enricher
3
3
  from fraudcrawler.scraping.zyte import ZyteApi
4
4
  from fraudcrawler.processing.processor import Processor
5
- from fraudcrawler.base.orchestrator import Orchestrator, ProductItem
5
+ from fraudcrawler.base.orchestrator import Orchestrator
6
6
  from fraudcrawler.base.client import FraudCrawlerClient
7
7
  from fraudcrawler.base.base import (
8
8
  Deepness,
@@ -11,6 +11,7 @@ from fraudcrawler.base.base import (
11
11
  Language,
12
12
  Location,
13
13
  Prompt,
14
+ ProductItem,
14
15
  )
15
16
 
16
17
  __all__ = [
fraudcrawler/base/base.py CHANGED
@@ -2,12 +2,13 @@ import json
2
2
  import logging
3
3
  from pydantic import (
4
4
  BaseModel,
5
+ Field,
5
6
  field_validator,
6
7
  model_validator,
7
8
  )
8
9
  from pydantic_settings import BaseSettings
9
10
  import re
10
- from typing import List
11
+ from typing import List, Dict
11
12
 
12
13
  import aiohttp
13
14
 
@@ -114,12 +115,39 @@ class Deepness(BaseModel):
114
115
  enrichment: Enrichment | None = None
115
116
 
116
117
 
118
+ class ProductItem(BaseModel):
119
+ """Model representing a product item."""
120
+
121
+ # Serp/Enrich parameters
122
+ search_term: str
123
+ search_term_type: str
124
+ url: str
125
+ marketplace_name: str
126
+ domain: str
127
+
128
+ # Zyte parameters
129
+ product_name: str | None = None
130
+ product_price: str | None = None
131
+ product_description: str | None = None
132
+ product_images: List[str] | None = None
133
+ probability: float | None = None
134
+ html: str | None = None
135
+ html_clean: str | None = None
136
+
137
+ # Processor parameters are set dynamic so we must allow extra fields
138
+ classifications: Dict[str, int] = Field(default_factory=dict)
139
+
140
+ # Filtering parameters
141
+ filtered: bool = False
142
+ filtered_at_stage: str | None = None
143
+
144
+
117
145
  class Prompt(BaseModel):
118
146
  """Model for prompts."""
119
147
 
120
148
  name: str
121
- context: str
122
149
  system_prompt: str
150
+ product_item_fields: List[str]
123
151
  allowed_classes: List[int]
124
152
 
125
153
  @field_validator("allowed_classes", mode="before")
@@ -129,6 +157,17 @@ class Prompt(BaseModel):
129
157
  raise ValueError("all values in allowed_classes must be positive integers.")
130
158
  return val
131
159
 
160
+ @field_validator("product_item_fields", mode="before")
161
+ def validate_product_item_fields(cls, val):
162
+ """Ensure all product_item_fields are valid ProductItem attributes."""
163
+ valid_fields = set(ProductItem.model_fields.keys())
164
+ for field in val:
165
+ if field not in valid_fields:
166
+ raise ValueError(
167
+ f"Invalid product_item_field: '{field}'. Must be one of: {sorted(valid_fields)}"
168
+ )
169
+ return val
170
+
132
171
 
133
172
  class AsyncClient:
134
173
  """Base class for sub-classes using async HTTP requests."""
@@ -9,8 +9,16 @@ from typing import List
9
9
  import pandas as pd
10
10
 
11
11
  from fraudcrawler.settings import ROOT_DIR
12
- from fraudcrawler.base.base import Setup, Language, Location, Deepness, Host, Prompt
13
- from fraudcrawler.base.orchestrator import Orchestrator, ProductItem
12
+ from fraudcrawler.base.base import (
13
+ Setup,
14
+ Language,
15
+ Location,
16
+ Deepness,
17
+ Host,
18
+ Prompt,
19
+ ProductItem,
20
+ )
21
+ from fraudcrawler.base.orchestrator import Orchestrator
14
22
  from fraudcrawler.scraping.serp import SearchEngine
15
23
 
16
24
  logger = logging.getLogger(__name__)
@@ -1,12 +1,13 @@
1
1
  from abc import ABC, abstractmethod
2
2
  import asyncio
3
3
  import logging
4
- from pydantic import BaseModel, Field
5
4
  from typing import Dict, List, Set, cast
5
+ from bs4 import BeautifulSoup
6
6
 
7
7
  from fraudcrawler.settings import (
8
8
  PROCESSOR_DEFAULT_MODEL,
9
9
  PROCESSOR_DEFAULT_IF_MISSING,
10
+ PROCESSOR_PRODUCT_DETAILS_TEMPLATE,
10
11
  MAX_RETRIES,
11
12
  RETRY_DELAY,
12
13
  )
@@ -15,37 +16,19 @@ from fraudcrawler.settings import (
15
16
  DEFAULT_N_ZYTE_WKRS,
16
17
  DEFAULT_N_PROC_WKRS,
17
18
  )
18
- from fraudcrawler.base.base import Deepness, Host, Language, Location, Prompt
19
+ from fraudcrawler.base.base import (
20
+ Deepness,
21
+ Host,
22
+ Language,
23
+ Location,
24
+ Prompt,
25
+ ProductItem,
26
+ )
19
27
  from fraudcrawler import SerpApi, SearchEngine, Enricher, ZyteApi, Processor
20
28
 
21
29
  logger = logging.getLogger(__name__)
22
30
 
23
31
 
24
- class ProductItem(BaseModel):
25
- """Model representing a product item."""
26
-
27
- # Serp/Enrich parameters
28
- search_term: str
29
- search_term_type: str
30
- url: str
31
- marketplace_name: str
32
- domain: str
33
-
34
- # Zyte parameters
35
- product_name: str | None = None
36
- product_price: str | None = None
37
- product_description: str | None = None
38
- product_images: List[str] | None = None
39
- probability: float | None = None
40
-
41
- # Processor parameters are set dynamic so we must allow extra fields
42
- classifications: Dict[str, int] = Field(default_factory=dict)
43
-
44
- # Filtering parameters
45
- filtered: bool = False
46
- filtered_at_stage: str | None = None
47
-
48
-
49
32
  class Orchestrator(ABC):
50
33
  """Abstract base class for orchestrating the different actors (crawling, processing).
51
34
 
@@ -231,15 +214,16 @@ class Orchestrator(ABC):
231
214
  product.probability = self._zyteapi.extract_probability(
232
215
  details=details
233
216
  )
234
-
217
+ product.html = self._zyteapi.extract_html(details=details)
218
+ if product.html:
219
+ soup = BeautifulSoup(product.html, "html.parser")
220
+ product.html_clean = soup.get_text(separator=" ", strip=True)
235
221
  # Filter the product based on the probability threshold
236
222
  if not self._zyteapi.keep_product(details=details):
237
223
  product.filtered = True
238
224
  product.filtered_at_stage = "Zyte probability threshold"
239
-
240
225
  except Exception as e:
241
226
  logger.warning(f"Error executing Zyte API search: {e}.")
242
-
243
227
  await queue_out.put(product)
244
228
  queue_in.task_done()
245
229
 
@@ -269,19 +253,26 @@ class Orchestrator(ABC):
269
253
  if not product.filtered:
270
254
  try:
271
255
  url = product.url
272
- name = product.product_name
273
- description = product.product_description
274
-
275
256
  # Run all the configured prompts
276
257
  for prompt in prompts:
258
+ # Dynamically build product_details string
259
+ details = []
260
+ for field in prompt.product_item_fields:
261
+ value = getattr(product, field, None)
262
+ if value is not None:
263
+ details.append(
264
+ PROCESSOR_PRODUCT_DETAILS_TEMPLATE.format(
265
+ field_name=field, field_value=value
266
+ )
267
+ )
268
+ product_details = "\n\n".join(details)
277
269
  logger.debug(
278
- f"Classify product {name} with prompt {prompt.name}"
270
+ f"Classify product at {url} with prompt {prompt.name} and details: {product_details}"
279
271
  )
280
272
  classification = await self._processor.classify(
281
273
  prompt=prompt,
282
274
  url=url,
283
- name=name,
284
- description=description,
275
+ product_details=product_details,
285
276
  )
286
277
  product.classifications[prompt.name] = classification
287
278
  except Exception as e:
@@ -16,38 +16,39 @@ def main():
16
16
  search_term = "Kühlschrank"
17
17
  language = Language(name="German")
18
18
  location = Location(name="Switzerland")
19
- deepness = Deepness(num_results=20)
19
+ deepness = Deepness(num_results=10)
20
20
  prompts = [
21
21
  Prompt(
22
- name="relevance",
23
- context="This organization is interested in checking the energy efficiency of certain devices.",
22
+ name="availability",
24
23
  system_prompt=(
25
- "You are a helpful and intelligent assistant. Your task is to classify any given product "
26
- "as either relevant (1) or not relevant (0), strictly based on the context and product details provided by the user. "
24
+ "You are a helpful and intelligent assistant helping an organization that is interested in checking the availability of certain products."
25
+ "Your task is to classify any given product as either available (1) or not available (0), strictly based on the context and product details provided by the user. "
27
26
  "You must consider all aspects of the given context and make a binary decision accordingly. "
28
- "If the product aligns with the user's needs, classify it as 1 (relevant); otherwise, classify it as 0 (not relevant). "
27
+ "If the product can be purchased, added to a shopping basket, delivered, or is listed as available in any form, classify it as 1 (available); "
28
+ "if there is any mention of out of stock, not available, no longer shippable, or similar, classify it as 0 (not available). "
29
29
  "Respond only with the number 1 or 0."
30
30
  ),
31
+ product_item_fields=["product_name", "html_clean"],
31
32
  allowed_classes=[0, 1],
32
33
  ),
33
- Prompt(
34
- name="seriousness",
35
- context="This organization is interested in checking the energy efficiency of certain devices.",
36
- system_prompt=(
37
- "You are an intelligent and discerning assistant. Your task is to classify each item as either "
38
- "a product for sale (1) or not a product for sale (0). To make this distinction, consider the following criteria: \n"
39
- " 1 Product for Sale (1): Classify as 1 if the result clearly indicates an item available for purchase, typically found "
40
- "within an online shop or marketplace.\n"
41
- " 2 Not a Product for Sale (0): Classify as 0 if the result is unrelated to a direct purchase of a product. This includes items such as: \n"
42
- " - Books and Videos: These may be available for sale, but if they are about or related to the searched product rather than being the "
43
- "exact product itself, classify as 0.\n"
44
- " - Advertisements: Promotional content that doesn't directly sell a product.\n"
45
- " - Companies and Services: Names and descriptions of companies or services related to the product but not the product itself.\n"
46
- " - Related Topics/Content: Any text or media that discusses or elaborates on the topic without offering a tangible product for sale.\n"
47
- "Make your decision based solely on the context and details provided in the search result. Respond only with the number 1 or 0."
48
- ),
49
- allowed_classes=[0, 1],
50
- ),
34
+ # Prompt(
35
+ # name="seriousness",
36
+ # system_prompt=(
37
+ # "You are a helpful and intelligent assistant helping an organization that is interested in checking the energy efficiency of certain devices. "
38
+ # "Your task is to classify each item as either a product for sale (1) or not a product for sale (0). To make this distinction, consider the following criteria: \n"
39
+ # " 1 Product for Sale (1): Classify as 1 if the result clearly indicates an item available for purchase, typically found "
40
+ # "within an online shop or marketplace.\n"
41
+ # " 2 Not a Product for Sale (0): Classify as 0 if the result is unrelated to a direct purchase of a product. This includes items such as: \n"
42
+ # " - Books and Videos: These may be available for sale, but if they are about or related to the searched product rather than being the "
43
+ # "exact product itself, classify as 0.\n"
44
+ # " - Advertisements: Promotional content that doesn't directly sell a product.\n"
45
+ # " - Companies and Services: Names and descriptions of companies or services related to the product but not the product itself.\n"
46
+ # " - Related Topics/Content: Any text or media that discusses or elaborates on the topic without offering a tangible product for sale.\n"
47
+ # "Make your decision based solely on the context and details provided in the search result. Respond only with the number 1 or 0."
48
+ # ),
49
+ # product_item_fields=["product_name", "product_description"],
50
+ # allowed_classes=[0, 1],
51
+ # ),
51
52
  ]
52
53
  # # Optional: Add tern ENRICHEMENT
53
54
  # from fraudcrawler import Enrichment
@@ -52,42 +52,34 @@ class Processor:
52
52
  raise ValueError("Empty response from OpenAI API")
53
53
  return content
54
54
 
55
- async def classify(
56
- self, prompt: Prompt, url: str, name: str | None, description: str | None
57
- ) -> int:
58
- """A generic classification method that classified a product based on a prompt object.
55
+ async def classify(self, prompt: Prompt, url: str, product_details: str) -> int:
56
+ """A generic classification method that classifies a product based on a prompt object.
59
57
 
60
58
  Args:
61
- prompt: A dictionary with keys "system_prompt", "user_prompt", etc.
59
+ prompt: A dictionary with keys "system_prompt", etc.
62
60
  url: Product URL (often used in the user_prompt).
63
- name: Product name (often used in the user_prompt).
64
- description: Product description (often used in the user_prompt).
61
+ product_details: String with product details, formatted per prompt.product_item_fields.
65
62
 
66
63
  Note:
67
64
  This method returns `PROCESSOR_DEFAULT_IF_MISSING` if:
68
- - 'name' or 'description' is None
65
+ - product_details is empty
69
66
  - an error occurs during the API call
70
67
  - if the response isn't in allowed_classes.
71
68
  """
72
69
  # If required fields are missing, return the prompt's default fallback if provided.
73
- if name is None or description is None:
74
- logger.warning(
75
- f"Missing required fields for classification: name='{name}', description='{description}'"
76
- )
70
+ if not product_details:
71
+ logger.warning("Missing required product_details for classification.")
77
72
  return self._default_if_missing
78
73
 
79
74
  # Substitute placeholders in user_prompt with the relevant arguments
80
75
  user_prompt = PROCESSOR_USER_PROMPT_TEMPLATE.format(
81
- context=prompt.context,
82
- url=url,
83
- name=name,
84
- description=description,
76
+ product_details=product_details,
85
77
  )
86
78
 
87
79
  # Call the OpenAI API
88
80
  try:
89
81
  logger.debug(
90
- f'Calling OpenAI API for classification (name="{name}", prompt="{prompt.name}")'
82
+ f'Calling OpenAI API for classification (url="{url}", prompt="{prompt.name}")'
91
83
  )
92
84
  content = await self._call_openai_api(
93
85
  system_prompt=prompt.system_prompt,
@@ -104,12 +96,12 @@ class Processor:
104
96
  return self._default_if_missing
105
97
 
106
98
  logger.info(
107
- f'Classification for "{name}" (prompt={prompt.name}): {classification}'
99
+ f'Classification for url="{url}" (prompt={prompt.name}): {classification}'
108
100
  )
109
101
  return classification
110
102
 
111
103
  except Exception as e:
112
104
  logger.error(
113
- f'Error classifying product "{name}" with prompt "{prompt.name}": {e}'
105
+ f'Error classifying product at url="{url}" with prompt "{prompt.name}": {e}'
114
106
  )
115
107
  return self._default_if_missing
@@ -14,6 +14,7 @@ logger = logging.getLogger(__name__)
14
14
 
15
15
  class SerpResult(BaseModel):
16
16
  """Model for a single search result from SerpApi."""
17
+
17
18
  url: str
18
19
  domain: str
19
20
  marketplace_name: str
@@ -23,6 +24,7 @@ class SerpResult(BaseModel):
23
24
 
24
25
  class SearchEngine(Enum):
25
26
  """Enum for the supported search engines."""
27
+
26
28
  GOOGLE = "google"
27
29
  GOOGLE_SHOPPING = "google_shopping"
28
30
 
@@ -33,7 +35,7 @@ class SerpApi(AsyncClient):
33
35
  _endpoint = "https://serpapi.com/search"
34
36
  _engine_marketplace_names = {
35
37
  SearchEngine.GOOGLE.value: "Google",
36
- SearchEngine.GOOGLE_SHOPPING.value: "Google Shopping"
38
+ SearchEngine.GOOGLE_SHOPPING.value: "Google Shopping",
37
39
  }
38
40
  _hostname_pattern = r"^(?:https?:\/\/)?([^\/:?#]+)"
39
41
 
@@ -1,6 +1,7 @@
1
1
  import asyncio
2
2
  import logging
3
3
  from typing import List
4
+ from base64 import b64decode
4
5
 
5
6
  import aiohttp
6
7
 
@@ -68,7 +69,8 @@ class ZyteApi(AsyncClient):
68
69
  "metadata": {
69
70
  "probability": float,
70
71
  },
71
- }
72
+ },
73
+ "httpResponseBody": base64
72
74
  }
73
75
  """
74
76
  logger.info(f"Fetching product details by Zyte for URL {url}.")
@@ -192,3 +194,24 @@ class ZyteApi(AsyncClient):
192
194
  }
193
195
  """
194
196
  return float(details.get("product", {}).get("metadata", {}).get("probability"))
197
+
198
+ @staticmethod
199
+ def extract_html(details: dict) -> str | None:
200
+ """Extracts the HTML from the Zyte API response.
201
+
202
+ The input argument is a dictionary of the following structure:
203
+ {
204
+ "httpResponseBody": base64
205
+ }
206
+ """
207
+
208
+ # Get the Base64-encoded content
209
+ encoded = details.get("httpResponseBody")
210
+
211
+ # Decode it into bytes
212
+ if isinstance(encoded, str):
213
+ decoded_bytes = b64decode(encoded)
214
+
215
+ # Convert bytes to string (assuming UTF-8 encoding)
216
+ decoded_string = decoded_bytes.decode("utf-8")
217
+ return decoded_string
fraudcrawler/settings.py CHANGED
@@ -22,9 +22,8 @@ ZYTE_DEFALUT_PROBABILITY_THRESHOLD = 0.1
22
22
  # Processor settings
23
23
  PROCESSOR_DEFAULT_MODEL = "gpt-4o"
24
24
  PROCESSOR_DEFAULT_IF_MISSING = -1
25
- PROCESSOR_USER_PROMPT_TEMPLATE = (
26
- "Context: {context}\n\nProduct Details: {name}\n{description}\\n\nRelevance:"
27
- )
25
+ PROCESSOR_USER_PROMPT_TEMPLATE = "Product Details:\n{product_details}\n\nRelevance:"
26
+ PROCESSOR_PRODUCT_DETAILS_TEMPLATE = "{field_name}:\n{field_value}"
28
27
 
29
28
  # Async settings
30
29
  DEFAULT_N_SERP_WKRS = 10
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: fraudcrawler
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: Intelligent Market Monitoring
5
5
  Home-page: https://github.com/open-veanu/fraudcrawler
6
6
  License: MIT
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
14
  Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Dist: aiohttp (>=3.11.14,<4.0.0)
16
+ Requires-Dist: beautifulsoup4 (>=4.13.4,<5.0.0)
16
17
  Requires-Dist: openai (>=1.68.2,<2.0.0)
17
18
  Requires-Dist: pandas (>=2.2.3,<3.0.0)
18
19
  Requires-Dist: pydantic-settings (>=2.8.1,<3.0.0)
@@ -80,7 +81,6 @@ deepness = Deepness(num_results=50)
80
81
  prompts = [
81
82
  Prompt(
82
83
  name="relevance",
83
- context="This organization is interested in medical products and drugs.",
84
84
  system_prompt=(
85
85
  "You are a helpful and intelligent assistant. Your task is to classify any given product "
86
86
  "as either relevant (1) or not relevant (0), strictly based on the context and product details provided by the user. "
@@ -0,0 +1,20 @@
1
+ fraudcrawler/__init__.py,sha256=yXFdQzlSLUZV4Oh0wkzghvPlICQO5TnpEtIHZaTay_c,717
2
+ fraudcrawler/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ fraudcrawler/base/base.py,sha256=JWjZ3mpX4caQAsWKYqtHrUqHfHr6GXlAaEjxxHV9ODQ,6020
4
+ fraudcrawler/base/client.py,sha256=FibiYycjUys-c4sv66Y2JqJu5y15be2MYd2_9yB3wG8,4936
5
+ fraudcrawler/base/google-languages.json,sha256=z0VtjMCsCcZq11OkCIb9jMDD1p9Ty4lhV7bq4ddYvec,10748
6
+ fraudcrawler/base/google-locations.json,sha256=UtNu0iSStllvFRTQXMobWKmZR1hKmtgtHftLNgaJTT0,9204345
7
+ fraudcrawler/base/orchestrator.py,sha256=p1gRtj3jVaFmtwPSKruiOixu3QDuSiHjPKFi0KKsgPk,24591
8
+ fraudcrawler/launch_demo_pipeline.py,sha256=zQxKAekJ56iKQ5-NeM0UMS-1Wd3ui0bpeqkH1nM9A4A,4628
9
+ fraudcrawler/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ fraudcrawler/processing/processor.py,sha256=An2orst0YRIav7bFuoDMgjwWz2Z9dyjVUbkNAMXNTTo,3748
11
+ fraudcrawler/scraping/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ fraudcrawler/scraping/enrich.py,sha256=X1BBZshdZqPmbduzhGwH0ULSzq03L_7bf7_UL8yOQ9E,10608
13
+ fraudcrawler/scraping/serp.py,sha256=ix2kCs9Xo694K8vjDL104MDb2Cun1AXfStxMaR-2u7U,17865
14
+ fraudcrawler/scraping/zyte.py,sha256=DUF5pIwpZyQw30qURnFxtp8KYpUgBkrXjM7RaVGH92Q,7005
15
+ fraudcrawler/settings.py,sha256=z63Lc8LnmfG7u0F7CVlGOXMMpr7LtJC0BzXDoA8rN7Q,839
16
+ fraudcrawler-0.4.2.dist-info/LICENSE,sha256=B-3FuHfe3S0fWAlKlceskPcRhzXq81g-rJ-ddUYb4O8,1062
17
+ fraudcrawler-0.4.2.dist-info/METADATA,sha256=M1xMdweLHpSbfEceT_5GpcDiLdDHpOHpzQ5w-ZNF4gQ,5931
18
+ fraudcrawler-0.4.2.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
19
+ fraudcrawler-0.4.2.dist-info/entry_points.txt,sha256=1Befm7cM6945y2AA1z9V4gZV63mtSWcAs7ypvgux_Xg,79
20
+ fraudcrawler-0.4.2.dist-info/RECORD,,
@@ -1,20 +0,0 @@
1
- fraudcrawler/__init__.py,sha256=o_K3jVqH-0Pfa08DxySUyHfrwAzHNf-fWbgV5v66oKA,713
2
- fraudcrawler/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- fraudcrawler/base/base.py,sha256=woesbPztEh7tbD0ty9S37JbFrbEC-01H9etmCT2ffnc,4771
4
- fraudcrawler/base/client.py,sha256=ONy1jf2qwQey4ix4Wdn_qJIik-8NUZHQpuQZyKIVf5I,4903
5
- fraudcrawler/base/google-languages.json,sha256=z0VtjMCsCcZq11OkCIb9jMDD1p9Ty4lhV7bq4ddYvec,10748
6
- fraudcrawler/base/google-locations.json,sha256=UtNu0iSStllvFRTQXMobWKmZR1hKmtgtHftLNgaJTT0,9204345
7
- fraudcrawler/base/orchestrator.py,sha256=VNM8QBT7nZ3BUzkL5pXKNmQxM_FY12UOfA7dnKKfo9U,24395
8
- fraudcrawler/launch_demo_pipeline.py,sha256=RIZTtdtZeJPhvSLp1IUjT_nhme_2q6mAGWKoL838E4E,4320
9
- fraudcrawler/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- fraudcrawler/processing/processor.py,sha256=IFVKIiNi0QoCAgPFkFtNDgxfhh01iDNUyIBZWACplR8,3993
11
- fraudcrawler/scraping/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- fraudcrawler/scraping/enrich.py,sha256=X1BBZshdZqPmbduzhGwH0ULSzq03L_7bf7_UL8yOQ9E,10608
13
- fraudcrawler/scraping/serp.py,sha256=baXcTcquSXpK_JvtHt0_J1CQ20yMHx7I7oF1_LtMpqE,17862
14
- fraudcrawler/scraping/zyte.py,sha256=ggI4iYG-E_UyiKgUpEFekeUd1giifEfJ_uyFUSJGSLY,6296
15
- fraudcrawler/settings.py,sha256=uMjWyDS-TDZBGUK0kiMVzc7TiYhuEav_GFY3A4XFcvo,805
16
- fraudcrawler-0.4.0.dist-info/LICENSE,sha256=B-3FuHfe3S0fWAlKlceskPcRhzXq81g-rJ-ddUYb4O8,1062
17
- fraudcrawler-0.4.0.dist-info/METADATA,sha256=uNYbjX1vPPH--hOSI9gVIkgSGhFkYV7SQ_zWR9Nc1Ng,5965
18
- fraudcrawler-0.4.0.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
19
- fraudcrawler-0.4.0.dist-info/entry_points.txt,sha256=1Befm7cM6945y2AA1z9V4gZV63mtSWcAs7ypvgux_Xg,79
20
- fraudcrawler-0.4.0.dist-info/RECORD,,