fraudcrawler 0.4.7__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fraudcrawler might be problematic. Click here for more details.

@@ -2,12 +2,13 @@ from base64 import b64encode
2
2
  from collections import defaultdict
3
3
  import logging
4
4
  from pydantic import BaseModel
5
- from typing import Dict, List, Iterator
5
+ from typing import Dict, Iterator, List
6
6
 
7
+ import httpx
7
8
  from tenacity import RetryCallState
8
9
 
9
10
  from fraudcrawler.settings import ENRICHMENT_DEFAULT_LIMIT
10
- from fraudcrawler.base.base import Location, Language, AsyncClient
11
+ from fraudcrawler.base.base import Location, Language
11
12
  from fraudcrawler.base.retry import get_async_retry
12
13
 
13
14
 
@@ -21,7 +22,7 @@ class Keyword(BaseModel):
21
22
  volume: int
22
23
 
23
24
 
24
- class Enricher(AsyncClient):
25
+ class Enricher:
25
26
  """A client to interact with the DataForSEO API for enhancing searches (producing alternative search_terms)."""
26
27
 
27
28
  _auth_encoding = "ascii"
@@ -29,13 +30,15 @@ class Enricher(AsyncClient):
29
30
  _suggestions_endpoint = "/v3/dataforseo_labs/google/keyword_suggestions/live"
30
31
  _keywords_endpoint = "/v3/dataforseo_labs/google/related_keywords/live"
31
32
 
32
- def __init__(self, user: str, pwd: str):
33
+ def __init__(self, http_client: httpx.AsyncClient, user: str, pwd: str):
33
34
  """Initializes the DataForSeoApiClient with the given username and password.
34
35
 
35
36
  Args:
37
+ http_client: An httpx.AsyncClient to use for the async requests.
36
38
  user: The username for DataForSEO API.
37
39
  pwd: The password for DataForSEO API.
38
40
  """
41
+ self._http_client = http_client
39
42
  self._user = user
40
43
  self._pwd = pwd
41
44
  auth = f"{user}:{pwd}"
@@ -161,7 +164,9 @@ class Enricher(AsyncClient):
161
164
  }
162
165
  ]
163
166
  url = f"{self._base_endpoint}{self._suggestions_endpoint}"
164
- logger.debug(f'DataForSEO url="{url}" with data="{data}".')
167
+ logger.debug(
168
+ f'DataForSEO search suggested keywords with url="{url}" and data="{data}".'
169
+ )
165
170
 
166
171
  # Perform the request and retry if necessary. There is some context aware logging
167
172
  # - `before`: before the request is made (or before retrying)
@@ -175,10 +180,14 @@ class Enricher(AsyncClient):
175
180
  )
176
181
  async for attempt in retry:
177
182
  with attempt:
178
- sugg_data = await self.post(url=url, headers=self._headers, data=data)
183
+ response = await self._http_client.post(
184
+ url=url, headers=self._headers, json=data
185
+ )
186
+ response.raise_for_status()
179
187
 
180
188
  # Extract the keywords from the response
181
- keywords = self._extract_suggested_keywords(data=sugg_data)
189
+ data_suggested_keywords = response.json()
190
+ keywords = self._extract_suggested_keywords(data=data_suggested_keywords)
182
191
 
183
192
  logger.debug(f"Found {len(keywords)} suggestions from DataForSEO search.")
184
193
  return keywords
@@ -260,28 +269,36 @@ class Enricher(AsyncClient):
260
269
  "limit": limit,
261
270
  }
262
271
  ]
272
+ url = f"{self._base_endpoint}{self._keywords_endpoint}"
263
273
  logger.debug(
264
- f'DataForSEO search for related keywords with search_term="{search_term}".'
274
+ f'DataForSEO search related keywords with url="{url}" and data="{data}".'
265
275
  )
266
- try:
267
- url = f"{self._base_endpoint}{self._keywords_endpoint}"
268
- logger.debug(f'DataForSEO url="{url}" with data="{data}".')
269
- rel_data = await self.post(url=url, headers=self._headers, data=data)
270
- except Exception as e:
271
- logger.error(f"DataForSEO related keyword search failed with error: {e}.")
276
+
277
+ # Perform the request and retry if necessary. There is some context aware logging
278
+ # - `before`: before the request is made (or before retrying)
279
+ # - `before_sleep`: if the request fails before sleeping
280
+ retry = get_async_retry()
281
+ retry.before = lambda retry_state: self._log_before(
282
+ search_term=search_term, retry_state=retry_state
283
+ )
284
+ retry.before_sleep = lambda retry_state: self._log_before_sleep(
285
+ search_term=search_term, retry_state=retry_state
286
+ )
287
+ async for attempt in retry:
288
+ with attempt:
289
+ response = await self._http_client.post(
290
+ url=url, headers=self._headers, json=data
291
+ )
292
+ response.raise_for_status()
272
293
 
273
294
  # Extract the keywords from the response
274
- try:
275
- keywords = self._extract_related_keywords(data=rel_data)
276
- except Exception as e:
277
- logger.error(
278
- f"Failed to extract related keywords from DataForSEO response with error: {e}."
279
- )
295
+ data_related_keywords = response.json()
296
+ keywords = self._extract_related_keywords(data=data_related_keywords)
280
297
 
281
298
  logger.debug(f"Found {len(keywords)} related keywords from DataForSEO search.")
282
299
  return keywords
283
300
 
284
- async def apply(
301
+ async def enrich(
285
302
  self,
286
303
  search_term: str,
287
304
  language: Language,