lacuscore 1.8.7__py3-none-any.whl → 1.8.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lacuscore/lacuscore.py CHANGED
@@ -10,6 +10,7 @@ import os
10
10
  import pickle
11
11
  import random
12
12
  import re
13
+ import sys
13
14
  import time
14
15
  import unicodedata
15
16
  import zlib
@@ -28,6 +29,7 @@ from urllib.parse import urlsplit
28
29
 
29
30
  from dns import resolver
30
31
  from dns.exception import DNSException
32
+ from dns.exception import Timeout as DNSTimeout
31
33
 
32
34
  from defang import refang # type: ignore[import-untyped]
33
35
  from playwrightcapture import Capture, PlaywrightCaptureException
@@ -37,6 +39,11 @@ from redis.exceptions import ConnectionError as RedisConnectionError
37
39
  from redis.exceptions import DataError
38
40
  from ua_parser import user_agent_parser # type: ignore[import-untyped]
39
41
 
42
+ if sys.version_info < (3, 11):
43
+ from async_timeout import timeout
44
+ else:
45
+ from asyncio import timeout
46
+
40
47
  BROWSER = Literal['chromium', 'firefox', 'webkit']
41
48
 
42
49
 
@@ -161,7 +168,7 @@ class LacusCore():
161
168
  tor_proxy: str | None=None,
162
169
  only_global_lookups: bool=True,
163
170
  max_retries: int=3,
164
- loglevel: str='INFO') -> None:
171
+ loglevel: str | int='INFO') -> None:
165
172
  self.master_logger = logging.getLogger(f'{self.__class__.__name__}')
166
173
  self.master_logger.setLevel(loglevel)
167
174
  self.redis = redis_connector
@@ -169,6 +176,10 @@ class LacusCore():
169
176
  self.tor_proxy = tor_proxy
170
177
  self.only_global_lookups = only_global_lookups
171
178
  self.max_retries = max_retries
179
+ self.dnsresolver: resolver.Resolver = resolver.Resolver()
180
+ self.dnsresolver.cache = resolver.Cache(30)
181
+ self.dnsresolver.timeout = 2
182
+ self.dnsresolver.lifetime = 3
172
183
 
173
184
  # NOTE: Remove in 1.8.* - clear old ongoing captures queue in case of need
174
185
  if self.redis.type('lacus:ongoing') in ['set', b'set']: # type: ignore[no-untyped-call]
@@ -559,6 +570,10 @@ class LacusCore():
559
570
  # not an IP, try resolving
560
571
  try:
561
572
  ips_to_check = self.__get_ips(logger, splitted_url.hostname)
573
+ except DNSTimeout as e:
574
+ # for a timeout, we do not want to retry, as it is likely to timeout again
575
+ result = {'error': f'DNS Timeout for "{splitted_url.hostname}": {e}'}
576
+ raise CaptureError(f'DNS Timeout for "{splitted_url.hostname}": {e}')
562
577
  except Exception as e:
563
578
  result = {'error': f'Issue with hostname resolution ({splitted_url.hostname}): {e}. Full URL: "{url}".'}
564
579
  raise CaptureError(f'Issue with hostname resolution ({splitted_url.hostname}): {e}. Full URL: "{url}".')
@@ -594,7 +609,8 @@ class LacusCore():
594
609
  browser=browser_engine,
595
610
  device_name=to_capture.get('device_name'),
596
611
  proxy=proxy,
597
- general_timeout_in_sec=general_timeout) as capture:
612
+ general_timeout_in_sec=general_timeout,
613
+ loglevel=self.master_logger.getEffectiveLevel()) as capture:
598
614
  # required by Mypy: https://github.com/python/mypy/issues/3004
599
615
  capture.headers = to_capture.get('headers') # type: ignore[assignment]
600
616
  capture.cookies = to_capture.get('cookies') # type: ignore[assignment]
@@ -606,18 +622,18 @@ class LacusCore():
606
622
  capture.locale = to_capture.get('locale') # type: ignore[assignment]
607
623
  capture.color_scheme = to_capture.get('color_scheme') # type: ignore[assignment]
608
624
  try:
609
- await asyncio.wait_for(capture.initialize_context(), timeout=general_timeout)
625
+ async with timeout(general_timeout):
626
+ await capture.initialize_context()
610
627
  except (TimeoutError, asyncio.exceptions.TimeoutError):
611
628
  logger.warning(f'Initializing the context for {url} took longer than the allowed general timeout ({general_timeout}s)')
612
629
  raise RetryCapture(f'Initializing the context for {url} took longer than the allowed general timeout ({general_timeout}s)')
613
- playwright_result = await asyncio.wait_for(
614
- capture.capture_page(
630
+ async with timeout(self.max_capture_time):
631
+ playwright_result = await capture.capture_page(
615
632
  url, referer=to_capture.get('referer'),
616
633
  depth=to_capture.get('depth', 0),
617
634
  rendered_hostname_only=to_capture.get('rendered_hostname_only', True),
618
635
  with_favicon=to_capture.get('with_favicon', False),
619
- max_depth_capture_time=self.max_capture_time),
620
- timeout=self.max_capture_time)
636
+ max_depth_capture_time=self.max_capture_time)
621
637
  result = cast(CaptureResponse, playwright_result)
622
638
  if 'error' in result and 'error_name' in result:
623
639
  # generate stats
@@ -847,13 +863,17 @@ class LacusCore():
847
863
  # It is happening when the error code is NoAnswer
848
864
  resolved_ips = []
849
865
  try:
850
- answers_a = resolver.resolve(hostname, 'A')
866
+ answers_a = self.dnsresolver.resolve(hostname, 'A')
851
867
  resolved_ips += [ip_address(str(answer)) for answer in answers_a]
868
+ except DNSTimeout as e:
869
+ raise e
852
870
  except DNSException as e:
853
871
  logger.info(f'No A record for "{hostname}": {e}')
854
872
  try:
855
- answers_aaaa = resolver.resolve(hostname, 'AAAA')
873
+ answers_aaaa = self.dnsresolver.resolve(hostname, 'AAAA')
856
874
  resolved_ips += [ip_address(str(answer)) for answer in answers_aaaa]
875
+ except DNSTimeout as e:
876
+ raise e
857
877
  except DNSException as e:
858
878
  logger.info(f'No AAAA record for "{hostname}": {e}')
859
879
  return resolved_ips
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lacuscore
3
- Version: 1.8.7
3
+ Version: 1.8.9
4
4
  Summary: Core of Lacus, usable as a module
5
5
  Home-page: https://github.com/ail-project/LacusCore
6
6
  License: BSD-3-Clause
@@ -25,10 +25,11 @@ Classifier: Topic :: Security
25
25
  Provides-Extra: docs
26
26
  Requires-Dist: Sphinx (<7.2) ; (python_version < "3.9") and (extra == "docs")
27
27
  Requires-Dist: Sphinx (>=7.2,<8.0) ; (python_version >= "3.9") and (extra == "docs")
28
+ Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
28
29
  Requires-Dist: defang (>=0.5.3,<0.6.0)
29
30
  Requires-Dist: dnspython (>=2.6.1,<3.0.0)
30
- Requires-Dist: playwrightcapture[recaptcha] (>=1.23.8,<2.0.0)
31
- Requires-Dist: redis[hiredis] (>=5.0.1,<6.0.0)
31
+ Requires-Dist: playwrightcapture[recaptcha] (>=1.23.10,<2.0.0)
32
+ Requires-Dist: redis[hiredis] (>=5.0.3,<6.0.0)
32
33
  Requires-Dist: requests (>=2.31.0,<3.0.0)
33
34
  Requires-Dist: ua-parser (>=0.18.0,<0.19.0)
34
35
  Project-URL: Documentation, https://lacuscore.readthedocs.io/en/latest/
@@ -0,0 +1,8 @@
1
+ lacuscore/__init__.py,sha256=10JsUmjLD9xQIsjTDYT5nGowsJVe9C-FHNkaUFuAUtU,321
2
+ lacuscore/lacus_monitoring.py,sha256=UOfE_1-_rhVeKJXQ_m9XxYkr7VwyQnA6iK-x_tcXJfo,2775
3
+ lacuscore/lacuscore.py,sha256=iI1Cz9gnhQo8dw7gPII_vCsGHB_wzv-_9cpB3jRDizw,43901
4
+ lacuscore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ lacuscore-1.8.9.dist-info/LICENSE,sha256=4C4hLYrIkUD96Ggk-y_Go1Qf7PBZrEm9PSeTGe2nd4s,1516
6
+ lacuscore-1.8.9.dist-info/METADATA,sha256=L1ITEAiio8MB6NUSn-Yi9tAL6BaYMnz-VVKFg_2IX-A,2629
7
+ lacuscore-1.8.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
8
+ lacuscore-1.8.9.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.8.1
2
+ Generator: poetry-core 1.9.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,8 +0,0 @@
1
- lacuscore/__init__.py,sha256=10JsUmjLD9xQIsjTDYT5nGowsJVe9C-FHNkaUFuAUtU,321
2
- lacuscore/lacus_monitoring.py,sha256=UOfE_1-_rhVeKJXQ_m9XxYkr7VwyQnA6iK-x_tcXJfo,2775
3
- lacuscore/lacuscore.py,sha256=R1IBAnasIRcCnx2Qykd7D3v8xr2nSddIvNuf2bcoZGc,42988
4
- lacuscore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- lacuscore-1.8.7.dist-info/LICENSE,sha256=4C4hLYrIkUD96Ggk-y_Go1Qf7PBZrEm9PSeTGe2nd4s,1516
6
- lacuscore-1.8.7.dist-info/METADATA,sha256=hVV1r6OfAkZPwWnxsBd-Z329QfkxENhn9Ik22KnAdBM,2556
7
- lacuscore-1.8.7.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
8
- lacuscore-1.8.7.dist-info/RECORD,,