lacuscore 1.8.7__py3-none-any.whl → 1.8.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lacuscore/lacuscore.py +29 -9
- {lacuscore-1.8.7.dist-info → lacuscore-1.8.9.dist-info}/METADATA +4 -3
- lacuscore-1.8.9.dist-info/RECORD +8 -0
- {lacuscore-1.8.7.dist-info → lacuscore-1.8.9.dist-info}/WHEEL +1 -1
- lacuscore-1.8.7.dist-info/RECORD +0 -8
- {lacuscore-1.8.7.dist-info → lacuscore-1.8.9.dist-info}/LICENSE +0 -0
lacuscore/lacuscore.py
CHANGED
@@ -10,6 +10,7 @@ import os
|
|
10
10
|
import pickle
|
11
11
|
import random
|
12
12
|
import re
|
13
|
+
import sys
|
13
14
|
import time
|
14
15
|
import unicodedata
|
15
16
|
import zlib
|
@@ -28,6 +29,7 @@ from urllib.parse import urlsplit
|
|
28
29
|
|
29
30
|
from dns import resolver
|
30
31
|
from dns.exception import DNSException
|
32
|
+
from dns.exception import Timeout as DNSTimeout
|
31
33
|
|
32
34
|
from defang import refang # type: ignore[import-untyped]
|
33
35
|
from playwrightcapture import Capture, PlaywrightCaptureException
|
@@ -37,6 +39,11 @@ from redis.exceptions import ConnectionError as RedisConnectionError
|
|
37
39
|
from redis.exceptions import DataError
|
38
40
|
from ua_parser import user_agent_parser # type: ignore[import-untyped]
|
39
41
|
|
42
|
+
if sys.version_info < (3, 11):
|
43
|
+
from async_timeout import timeout
|
44
|
+
else:
|
45
|
+
from asyncio import timeout
|
46
|
+
|
40
47
|
BROWSER = Literal['chromium', 'firefox', 'webkit']
|
41
48
|
|
42
49
|
|
@@ -161,7 +168,7 @@ class LacusCore():
|
|
161
168
|
tor_proxy: str | None=None,
|
162
169
|
only_global_lookups: bool=True,
|
163
170
|
max_retries: int=3,
|
164
|
-
loglevel: str='INFO') -> None:
|
171
|
+
loglevel: str | int='INFO') -> None:
|
165
172
|
self.master_logger = logging.getLogger(f'{self.__class__.__name__}')
|
166
173
|
self.master_logger.setLevel(loglevel)
|
167
174
|
self.redis = redis_connector
|
@@ -169,6 +176,10 @@ class LacusCore():
|
|
169
176
|
self.tor_proxy = tor_proxy
|
170
177
|
self.only_global_lookups = only_global_lookups
|
171
178
|
self.max_retries = max_retries
|
179
|
+
self.dnsresolver: resolver.Resolver = resolver.Resolver()
|
180
|
+
self.dnsresolver.cache = resolver.Cache(30)
|
181
|
+
self.dnsresolver.timeout = 2
|
182
|
+
self.dnsresolver.lifetime = 3
|
172
183
|
|
173
184
|
# NOTE: Remove in 1.8.* - clear old ongoing captures queue in case of need
|
174
185
|
if self.redis.type('lacus:ongoing') in ['set', b'set']: # type: ignore[no-untyped-call]
|
@@ -559,6 +570,10 @@ class LacusCore():
|
|
559
570
|
# not an IP, try resolving
|
560
571
|
try:
|
561
572
|
ips_to_check = self.__get_ips(logger, splitted_url.hostname)
|
573
|
+
except DNSTimeout as e:
|
574
|
+
# for a timeout, we do not want to retry, as it is likely to timeout again
|
575
|
+
result = {'error': f'DNS Timeout for "{splitted_url.hostname}": {e}'}
|
576
|
+
raise CaptureError(f'DNS Timeout for "{splitted_url.hostname}": {e}')
|
562
577
|
except Exception as e:
|
563
578
|
result = {'error': f'Issue with hostname resolution ({splitted_url.hostname}): {e}. Full URL: "{url}".'}
|
564
579
|
raise CaptureError(f'Issue with hostname resolution ({splitted_url.hostname}): {e}. Full URL: "{url}".')
|
@@ -594,7 +609,8 @@ class LacusCore():
|
|
594
609
|
browser=browser_engine,
|
595
610
|
device_name=to_capture.get('device_name'),
|
596
611
|
proxy=proxy,
|
597
|
-
general_timeout_in_sec=general_timeout
|
612
|
+
general_timeout_in_sec=general_timeout,
|
613
|
+
loglevel=self.master_logger.getEffectiveLevel()) as capture:
|
598
614
|
# required by Mypy: https://github.com/python/mypy/issues/3004
|
599
615
|
capture.headers = to_capture.get('headers') # type: ignore[assignment]
|
600
616
|
capture.cookies = to_capture.get('cookies') # type: ignore[assignment]
|
@@ -606,18 +622,18 @@ class LacusCore():
|
|
606
622
|
capture.locale = to_capture.get('locale') # type: ignore[assignment]
|
607
623
|
capture.color_scheme = to_capture.get('color_scheme') # type: ignore[assignment]
|
608
624
|
try:
|
609
|
-
|
625
|
+
async with timeout(general_timeout):
|
626
|
+
await capture.initialize_context()
|
610
627
|
except (TimeoutError, asyncio.exceptions.TimeoutError):
|
611
628
|
logger.warning(f'Initializing the context for {url} took longer than the allowed general timeout ({general_timeout}s)')
|
612
629
|
raise RetryCapture(f'Initializing the context for {url} took longer than the allowed general timeout ({general_timeout}s)')
|
613
|
-
|
614
|
-
capture.capture_page(
|
630
|
+
async with timeout(self.max_capture_time):
|
631
|
+
playwright_result = await capture.capture_page(
|
615
632
|
url, referer=to_capture.get('referer'),
|
616
633
|
depth=to_capture.get('depth', 0),
|
617
634
|
rendered_hostname_only=to_capture.get('rendered_hostname_only', True),
|
618
635
|
with_favicon=to_capture.get('with_favicon', False),
|
619
|
-
max_depth_capture_time=self.max_capture_time)
|
620
|
-
timeout=self.max_capture_time)
|
636
|
+
max_depth_capture_time=self.max_capture_time)
|
621
637
|
result = cast(CaptureResponse, playwright_result)
|
622
638
|
if 'error' in result and 'error_name' in result:
|
623
639
|
# generate stats
|
@@ -847,13 +863,17 @@ class LacusCore():
|
|
847
863
|
# It is happening when the error code is NoAnswer
|
848
864
|
resolved_ips = []
|
849
865
|
try:
|
850
|
-
answers_a =
|
866
|
+
answers_a = self.dnsresolver.resolve(hostname, 'A')
|
851
867
|
resolved_ips += [ip_address(str(answer)) for answer in answers_a]
|
868
|
+
except DNSTimeout as e:
|
869
|
+
raise e
|
852
870
|
except DNSException as e:
|
853
871
|
logger.info(f'No A record for "{hostname}": {e}')
|
854
872
|
try:
|
855
|
-
answers_aaaa =
|
873
|
+
answers_aaaa = self.dnsresolver.resolve(hostname, 'AAAA')
|
856
874
|
resolved_ips += [ip_address(str(answer)) for answer in answers_aaaa]
|
875
|
+
except DNSTimeout as e:
|
876
|
+
raise e
|
857
877
|
except DNSException as e:
|
858
878
|
logger.info(f'No AAAA record for "{hostname}": {e}')
|
859
879
|
return resolved_ips
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lacuscore
|
3
|
-
Version: 1.8.
|
3
|
+
Version: 1.8.9
|
4
4
|
Summary: Core of Lacus, usable as a module
|
5
5
|
Home-page: https://github.com/ail-project/LacusCore
|
6
6
|
License: BSD-3-Clause
|
@@ -25,10 +25,11 @@ Classifier: Topic :: Security
|
|
25
25
|
Provides-Extra: docs
|
26
26
|
Requires-Dist: Sphinx (<7.2) ; (python_version < "3.9") and (extra == "docs")
|
27
27
|
Requires-Dist: Sphinx (>=7.2,<8.0) ; (python_version >= "3.9") and (extra == "docs")
|
28
|
+
Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
|
28
29
|
Requires-Dist: defang (>=0.5.3,<0.6.0)
|
29
30
|
Requires-Dist: dnspython (>=2.6.1,<3.0.0)
|
30
|
-
Requires-Dist: playwrightcapture[recaptcha] (>=1.23.
|
31
|
-
Requires-Dist: redis[hiredis] (>=5.0.
|
31
|
+
Requires-Dist: playwrightcapture[recaptcha] (>=1.23.10,<2.0.0)
|
32
|
+
Requires-Dist: redis[hiredis] (>=5.0.3,<6.0.0)
|
32
33
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
33
34
|
Requires-Dist: ua-parser (>=0.18.0,<0.19.0)
|
34
35
|
Project-URL: Documentation, https://lacuscore.readthedocs.io/en/latest/
|
@@ -0,0 +1,8 @@
|
|
1
|
+
lacuscore/__init__.py,sha256=10JsUmjLD9xQIsjTDYT5nGowsJVe9C-FHNkaUFuAUtU,321
|
2
|
+
lacuscore/lacus_monitoring.py,sha256=UOfE_1-_rhVeKJXQ_m9XxYkr7VwyQnA6iK-x_tcXJfo,2775
|
3
|
+
lacuscore/lacuscore.py,sha256=iI1Cz9gnhQo8dw7gPII_vCsGHB_wzv-_9cpB3jRDizw,43901
|
4
|
+
lacuscore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
lacuscore-1.8.9.dist-info/LICENSE,sha256=4C4hLYrIkUD96Ggk-y_Go1Qf7PBZrEm9PSeTGe2nd4s,1516
|
6
|
+
lacuscore-1.8.9.dist-info/METADATA,sha256=L1ITEAiio8MB6NUSn-Yi9tAL6BaYMnz-VVKFg_2IX-A,2629
|
7
|
+
lacuscore-1.8.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
8
|
+
lacuscore-1.8.9.dist-info/RECORD,,
|
lacuscore-1.8.7.dist-info/RECORD
DELETED
@@ -1,8 +0,0 @@
|
|
1
|
-
lacuscore/__init__.py,sha256=10JsUmjLD9xQIsjTDYT5nGowsJVe9C-FHNkaUFuAUtU,321
|
2
|
-
lacuscore/lacus_monitoring.py,sha256=UOfE_1-_rhVeKJXQ_m9XxYkr7VwyQnA6iK-x_tcXJfo,2775
|
3
|
-
lacuscore/lacuscore.py,sha256=R1IBAnasIRcCnx2Qykd7D3v8xr2nSddIvNuf2bcoZGc,42988
|
4
|
-
lacuscore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
lacuscore-1.8.7.dist-info/LICENSE,sha256=4C4hLYrIkUD96Ggk-y_Go1Qf7PBZrEm9PSeTGe2nd4s,1516
|
6
|
-
lacuscore-1.8.7.dist-info/METADATA,sha256=hVV1r6OfAkZPwWnxsBd-Z329QfkxENhn9Ik22KnAdBM,2556
|
7
|
-
lacuscore-1.8.7.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
8
|
-
lacuscore-1.8.7.dist-info/RECORD,,
|
File without changes
|