lacuscore 1.17.2__py3-none-any.whl → 1.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lacuscore/lacuscore.py +29 -4
- {lacuscore-1.17.2.dist-info → lacuscore-1.17.3.dist-info}/METADATA +1 -1
- {lacuscore-1.17.2.dist-info → lacuscore-1.17.3.dist-info}/RECORD +5 -5
- {lacuscore-1.17.2.dist-info → lacuscore-1.17.3.dist-info}/LICENSE +0 -0
- {lacuscore-1.17.2.dist-info → lacuscore-1.17.3.dist-info}/WHEEL +0 -0
lacuscore/lacuscore.py
CHANGED
@@ -9,6 +9,7 @@ import os
|
|
9
9
|
import pickle
|
10
10
|
import random
|
11
11
|
import re
|
12
|
+
import socket
|
12
13
|
import sys
|
13
14
|
import time
|
14
15
|
import unicodedata
|
@@ -38,6 +39,7 @@ from redis.exceptions import DataError
|
|
38
39
|
|
39
40
|
from . import task_logger
|
40
41
|
from .helpers import (
|
42
|
+
LacusCoreException,
|
41
43
|
LacusCoreLogAdapter, CaptureError, RetryCapture, CaptureSettingsError,
|
42
44
|
CaptureStatus, CaptureResponse, CaptureResponseJson, CaptureSettings)
|
43
45
|
|
@@ -80,6 +82,19 @@ def _secure_filename(filename: str) -> str:
|
|
80
82
|
return filename
|
81
83
|
|
82
84
|
|
85
|
+
def _check_proxy_port_open(proxy: dict[str, str] | str) -> bool:
|
86
|
+
if isinstance(proxy, dict):
|
87
|
+
to_check = proxy['server']
|
88
|
+
else:
|
89
|
+
to_check = proxy
|
90
|
+
splitted_proxy_url = urlsplit(to_check)
|
91
|
+
if not splitted_proxy_url.hostname or not splitted_proxy_url.port:
|
92
|
+
raise LacusCoreException('Invalid pre-defined proxy (needs hostname and port): {proxy}')
|
93
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
94
|
+
s.settimeout(3)
|
95
|
+
return s.connect_ex((splitted_proxy_url.hostname, splitted_proxy_url.port)) == 0
|
96
|
+
|
97
|
+
|
83
98
|
class LacusCore():
|
84
99
|
"""Capture URLs or web enabled documents using PlaywrightCapture.
|
85
100
|
|
@@ -95,8 +110,8 @@ class LacusCore():
|
|
95
110
|
def __init__(self, redis_connector: Redis[bytes], /, *,
|
96
111
|
max_capture_time: int=3600,
|
97
112
|
expire_results: int=36000,
|
98
|
-
tor_proxy: str | None=None,
|
99
|
-
i2p_proxy: str | None=None,
|
113
|
+
tor_proxy: dict[str, str] | str | None=None,
|
114
|
+
i2p_proxy: dict[str, str] | str | None=None,
|
100
115
|
only_global_lookups: bool=True,
|
101
116
|
max_retries: int=3,
|
102
117
|
headed_allowed: bool=False,
|
@@ -107,8 +122,10 @@ class LacusCore():
|
|
107
122
|
self.redis = redis_connector
|
108
123
|
self.max_capture_time = max_capture_time
|
109
124
|
self.expire_results = expire_results
|
125
|
+
|
110
126
|
self.tor_proxy = tor_proxy
|
111
127
|
self.i2p_proxy = i2p_proxy
|
128
|
+
|
112
129
|
self.only_global_lookups = only_global_lookups
|
113
130
|
self.max_retries = max_retries
|
114
131
|
self.headed_allowed = headed_allowed
|
@@ -453,18 +470,26 @@ class LacusCore():
|
|
453
470
|
proxy = to_capture.proxy
|
454
471
|
if self.tor_proxy:
|
455
472
|
# check if onion or forced
|
456
|
-
if (proxy == 'force_tor' # if the proxy is set to "force_tor", we use the pre-configured tor proxy, regardless the URL.
|
473
|
+
if (proxy == 'force_tor' # if the proxy is set to "force_tor", we use the pre-configured tor proxy, regardless the URL, legacy feature.
|
457
474
|
or (not proxy # if the TLD is "onion", we use the pre-configured tor proxy
|
458
475
|
and splitted_url.netloc
|
459
476
|
and splitted_url.hostname
|
460
477
|
and splitted_url.hostname.split('.')[-1] == 'onion')):
|
478
|
+
if not _check_proxy_port_open(self.tor_proxy):
|
479
|
+
logger.critical(f'Unable to connect to the default tor proxy: {self.tor_proxy}')
|
480
|
+
raise CaptureError('The selected tor proxy is unreachable, unable to run the capture.')
|
461
481
|
proxy = self.tor_proxy
|
462
|
-
|
482
|
+
logger.info('Using the default tor proxy.')
|
483
|
+
if self.i2p_proxy:
|
463
484
|
if (not proxy # if the TLD is "i2p", we use the pre-configured I2P proxy
|
464
485
|
and splitted_url.netloc
|
465
486
|
and splitted_url.hostname
|
466
487
|
and splitted_url.hostname.split('.')[-1] == 'i2p'):
|
488
|
+
if not _check_proxy_port_open(self.i2p_proxy):
|
489
|
+
logger.critical(f'Unable to connect to the default tor proxy: {self.i2p_proxy}')
|
490
|
+
raise CaptureError('The selected I2P proxy is unreachable, unable to run the capture.')
|
467
491
|
proxy = self.i2p_proxy
|
492
|
+
logger.info('Using the default I2P proxy.')
|
468
493
|
|
469
494
|
if self.only_global_lookups and not proxy and splitted_url.scheme not in ['data', 'file']:
|
470
495
|
# not relevant if we also have a proxy, or the thing to capture is a data URI or a file on disk
|
@@ -1,10 +1,10 @@
|
|
1
1
|
lacuscore/__init__.py,sha256=aLBshQPT9IBDKn5qWrX9A_exqtLFPyLsQiPWdfpAFjA,537
|
2
2
|
lacuscore/helpers.py,sha256=dTt-FM7SnwEgIeGTCAvREwa1K224iNb16mmaCwcY_eg,14096
|
3
3
|
lacuscore/lacus_monitoring.py,sha256=r6IaYuh6sMq43eOWdZx0fU8p4PWVZlqSD6nr6yOaTUU,2713
|
4
|
-
lacuscore/lacuscore.py,sha256=
|
4
|
+
lacuscore/lacuscore.py,sha256=uz5GJoPZrBTfsWNaCm6Bealc5JIOy7GYe78EqoazDyc,47025
|
5
5
|
lacuscore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
lacuscore/task_logger.py,sha256=2wDotU6r6vn-aKO8nZNdxSuisSj11LlcxuvW60qPL0Y,1909
|
7
|
-
lacuscore-1.17.
|
8
|
-
lacuscore-1.17.
|
9
|
-
lacuscore-1.17.
|
10
|
-
lacuscore-1.17.
|
7
|
+
lacuscore-1.17.3.dist-info/LICENSE,sha256=4C4hLYrIkUD96Ggk-y_Go1Qf7PBZrEm9PSeTGe2nd4s,1516
|
8
|
+
lacuscore-1.17.3.dist-info/METADATA,sha256=TC73WMFDvZYoe6OBM_jZ1bg9qBNOvpa85QGdLrajtlI,2654
|
9
|
+
lacuscore-1.17.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
10
|
+
lacuscore-1.17.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|