lacuscore 1.9.4__py3-none-any.whl → 1.9.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lacuscore/lacuscore.py +43 -25
- {lacuscore-1.9.4.dist-info → lacuscore-1.9.6.dist-info}/METADATA +4 -4
- {lacuscore-1.9.4.dist-info → lacuscore-1.9.6.dist-info}/RECORD +5 -5
- {lacuscore-1.9.4.dist-info → lacuscore-1.9.6.dist-info}/LICENSE +0 -0
- {lacuscore-1.9.4.dist-info → lacuscore-1.9.6.dist-info}/WHEEL +0 -0
lacuscore/lacuscore.py
CHANGED
@@ -531,20 +531,40 @@ class LacusCore():
|
|
531
531
|
browser_engine = 'firefox'
|
532
532
|
else:
|
533
533
|
browser_engine = 'webkit'
|
534
|
+
|
535
|
+
cookies: list[dict[str, Any]] = []
|
536
|
+
if to_capture.get('cookies') and to_capture['cookies'] is not None:
|
537
|
+
# In order to properly pass the cookies to playwright,
|
538
|
+
# each of then must have a name, a value and either a domain + path or a URL
|
539
|
+
# Name and value are mandatory, and we cannot auto-fill them.
|
540
|
+
# If the cookie doesn't have a domain + path OR a URL, we fill the domain
|
541
|
+
# with the hostname of the URL we try to capture and the path with "/"
|
542
|
+
for cookie in to_capture['cookies']:
|
543
|
+
if len(cookie) == 1:
|
544
|
+
# we have a cookie in the format key: value
|
545
|
+
name, value = cookie.popitem()
|
546
|
+
cookie = {'name': name, 'value': value}
|
547
|
+
if 'name' not in cookie or 'value' not in cookie:
|
548
|
+
logger.warning(f'Invalid cookie: {cookie}')
|
549
|
+
continue
|
550
|
+
if 'domain' not in cookie and 'url' not in cookie:
|
551
|
+
cookie['domain'] = splitted_url.hostname
|
552
|
+
cookie['path'] = '/'
|
553
|
+
cookies.append(cookie)
|
554
|
+
|
534
555
|
try:
|
535
556
|
logger.debug(f'Capturing {url}')
|
536
|
-
general_timeout = to_capture.get('general_timeout_in_sec')
|
537
557
|
stats_pipeline.sadd(f'stats:{today}:captures', url)
|
538
558
|
async with Capture(
|
539
559
|
browser=browser_engine,
|
540
560
|
device_name=to_capture.get('device_name'),
|
541
561
|
proxy=proxy,
|
542
|
-
general_timeout_in_sec=
|
562
|
+
general_timeout_in_sec=to_capture.get('general_timeout_in_sec'),
|
543
563
|
loglevel=self.master_logger.getEffectiveLevel(),
|
544
564
|
uuid=uuid) as capture:
|
545
565
|
# required by Mypy: https://github.com/python/mypy/issues/3004
|
546
566
|
capture.headers = to_capture.get('headers') # type: ignore[assignment]
|
547
|
-
capture.cookies =
|
567
|
+
capture.cookies = cookies # type: ignore[assignment]
|
548
568
|
capture.viewport = to_capture.get('viewport') # type: ignore[assignment]
|
549
569
|
capture.user_agent = to_capture.get('user_agent') # type: ignore[assignment]
|
550
570
|
capture.http_credentials = to_capture.get('http_credentials') # type: ignore[assignment]
|
@@ -552,27 +572,30 @@ class LacusCore():
|
|
552
572
|
capture.timezone_id = to_capture.get('timezone_id') # type: ignore[assignment]
|
553
573
|
capture.locale = to_capture.get('locale') # type: ignore[assignment]
|
554
574
|
capture.color_scheme = to_capture.get('color_scheme') # type: ignore[assignment]
|
575
|
+
|
576
|
+
# make sure the initialization doesn't take too long
|
577
|
+
init_timeout = max(self.max_capture_time / 10, 5)
|
555
578
|
try:
|
556
|
-
# make sure the initialization doesn't take too long
|
557
|
-
if general_timeout is None:
|
558
|
-
general_timeout = 5
|
559
|
-
init_timeout = max(general_timeout / 2, 5)
|
560
579
|
async with timeout(init_timeout) as initialize_timeout:
|
561
580
|
await capture.initialize_context()
|
562
|
-
|
563
581
|
except (TimeoutError, asyncio.exceptions.TimeoutError):
|
564
582
|
timeout_expired(initialize_timeout, logger, 'Initializing took too long.')
|
565
|
-
logger.warning(f'Initializing the context for {url} took longer than the allowed
|
566
|
-
raise RetryCapture(f'Initializing the context for {url} took longer than the allowed
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
583
|
+
logger.warning(f'Initializing the context for {url} took longer than the allowed initialization timeout ({init_timeout}s)')
|
584
|
+
raise RetryCapture(f'Initializing the context for {url} took longer than the allowed initialization timeout ({init_timeout}s)')
|
585
|
+
|
586
|
+
try:
|
587
|
+
async with timeout(self.max_capture_time) as capture_timeout:
|
588
|
+
playwright_result = await capture.capture_page(
|
589
|
+
url, referer=to_capture.get('referer'),
|
590
|
+
depth=to_capture.get('depth', 0),
|
591
|
+
rendered_hostname_only=to_capture.get('rendered_hostname_only', True),
|
592
|
+
with_favicon=to_capture.get('with_favicon', False),
|
593
|
+
allow_tracking=to_capture.get('allow_tracking', False),
|
594
|
+
max_depth_capture_time=self.max_capture_time)
|
595
|
+
except (TimeoutError, asyncio.exceptions.TimeoutError):
|
596
|
+
timeout_expired(capture_timeout, logger, 'Capture took too long.')
|
597
|
+
logger.warning(f'The capture of {url} took longer than the allowed max capture time ({self.max_capture_time}s)')
|
598
|
+
raise RetryCapture(f'The capture of {url} took longer than the allowed max capture time ({self.max_capture_time}s)')
|
576
599
|
result = cast(CaptureResponse, playwright_result)
|
577
600
|
if 'error' in result and 'error_name' in result:
|
578
601
|
# generate stats
|
@@ -581,7 +604,7 @@ class LacusCore():
|
|
581
604
|
except RetryCapture as e:
|
582
605
|
raise e
|
583
606
|
except PlaywrightCaptureException as e:
|
584
|
-
logger.
|
607
|
+
logger.warning(f'Invalid parameters for the capture of {url} - {e}')
|
585
608
|
result = {'error': f'Invalid parameters for the capture of {url} - {e}'}
|
586
609
|
raise CaptureError(f'Invalid parameters for the capture of {url} - {e}')
|
587
610
|
except asyncio.CancelledError:
|
@@ -590,11 +613,6 @@ class LacusCore():
|
|
590
613
|
# The capture can be canceled if it has been running for way too long.
|
591
614
|
# We can give it another short.
|
592
615
|
raise RetryCapture(f'The capture of {url} has been cancelled.')
|
593
|
-
except (TimeoutError, asyncio.exceptions.TimeoutError):
|
594
|
-
timeout_expired(capture_timeout, logger, 'Capture took too long.')
|
595
|
-
logger.warning(f'The capture of {url} took longer than the allowed max capture time ({self.max_capture_time}s)')
|
596
|
-
result = {'error': f'The capture of {url} took longer than the allowed max capture time ({self.max_capture_time}s)'}
|
597
|
-
raise CaptureError(f'The capture of {url} took longer than the allowed max capture time ({self.max_capture_time}s)')
|
598
616
|
except Exception as e:
|
599
617
|
logger.exception(f'Something went poorly {url} - {e}')
|
600
618
|
result = {'error': f'Something went poorly {url} - {e}'}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lacuscore
|
3
|
-
Version: 1.9.
|
3
|
+
Version: 1.9.6
|
4
4
|
Summary: Core of Lacus, usable as a module
|
5
5
|
Home-page: https://github.com/ail-project/LacusCore
|
6
6
|
License: BSD-3-Clause
|
@@ -28,9 +28,9 @@ Requires-Dist: Sphinx (>=7.2,<8.0) ; (python_version >= "3.9") and (extra == "do
|
|
28
28
|
Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
|
29
29
|
Requires-Dist: defang (>=0.5.3,<0.6.0)
|
30
30
|
Requires-Dist: dnspython (>=2.6.1,<3.0.0)
|
31
|
-
Requires-Dist: playwrightcapture[recaptcha] (>=1.24.
|
32
|
-
Requires-Dist: redis[hiredis] (>=5.0.
|
33
|
-
Requires-Dist: requests (>=2.32.
|
31
|
+
Requires-Dist: playwrightcapture[recaptcha] (>=1.24.11,<2.0.0)
|
32
|
+
Requires-Dist: redis[hiredis] (>=5.0.6,<6.0.0)
|
33
|
+
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
34
34
|
Requires-Dist: ua-parser (>=0.18.0,<0.19.0)
|
35
35
|
Project-URL: Documentation, https://lacuscore.readthedocs.io/en/latest/
|
36
36
|
Project-URL: Repository, https://github.com/ail-project/LacusCore
|
@@ -1,10 +1,10 @@
|
|
1
1
|
lacuscore/__init__.py,sha256=hM4lKoPNybDCUMWdXTVVI1gRk_riLvRZ7IwFbamZLzE,341
|
2
2
|
lacuscore/helpers.py,sha256=lULN7HhY-4a4HG-ybIt4jO3wEGTxkm_jKNqsGpNZo4Y,2711
|
3
3
|
lacuscore/lacus_monitoring.py,sha256=UOfE_1-_rhVeKJXQ_m9XxYkr7VwyQnA6iK-x_tcXJfo,2775
|
4
|
-
lacuscore/lacuscore.py,sha256=
|
4
|
+
lacuscore/lacuscore.py,sha256=pk9E2w2zEH8l6VywXW4Ur_-XAMhFVDWTWIt2bIZ07Bo,44047
|
5
5
|
lacuscore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
lacuscore/task_logger.py,sha256=8WbdJdKnGeFCxt9gtCNLI9vAQQZbsy2I5PRQpHP7XFU,1916
|
7
|
-
lacuscore-1.9.
|
8
|
-
lacuscore-1.9.
|
9
|
-
lacuscore-1.9.
|
10
|
-
lacuscore-1.9.
|
7
|
+
lacuscore-1.9.6.dist-info/LICENSE,sha256=4C4hLYrIkUD96Ggk-y_Go1Qf7PBZrEm9PSeTGe2nd4s,1516
|
8
|
+
lacuscore-1.9.6.dist-info/METADATA,sha256=HGVQl2vRm95AmDm9d0z0OU89GX-8ZGUDsiSR8eotx3Y,2629
|
9
|
+
lacuscore-1.9.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
10
|
+
lacuscore-1.9.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|