lacuscore 1.17.1__py3-none-any.whl → 1.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lacuscore/lacuscore.py +38 -4
- {lacuscore-1.17.1.dist-info → lacuscore-1.17.3.dist-info}/METADATA +1 -1
- {lacuscore-1.17.1.dist-info → lacuscore-1.17.3.dist-info}/RECORD +5 -5
- {lacuscore-1.17.1.dist-info → lacuscore-1.17.3.dist-info}/LICENSE +0 -0
- {lacuscore-1.17.1.dist-info → lacuscore-1.17.3.dist-info}/WHEEL +0 -0
lacuscore/lacuscore.py
CHANGED
@@ -9,6 +9,7 @@ import os
|
|
9
9
|
import pickle
|
10
10
|
import random
|
11
11
|
import re
|
12
|
+
import socket
|
12
13
|
import sys
|
13
14
|
import time
|
14
15
|
import unicodedata
|
@@ -38,6 +39,7 @@ from redis.exceptions import DataError
|
|
38
39
|
|
39
40
|
from . import task_logger
|
40
41
|
from .helpers import (
|
42
|
+
LacusCoreException,
|
41
43
|
LacusCoreLogAdapter, CaptureError, RetryCapture, CaptureSettingsError,
|
42
44
|
CaptureStatus, CaptureResponse, CaptureResponseJson, CaptureSettings)
|
43
45
|
|
@@ -80,13 +82,27 @@ def _secure_filename(filename: str) -> str:
|
|
80
82
|
return filename
|
81
83
|
|
82
84
|
|
85
|
+
def _check_proxy_port_open(proxy: dict[str, str] | str) -> bool:
|
86
|
+
if isinstance(proxy, dict):
|
87
|
+
to_check = proxy['server']
|
88
|
+
else:
|
89
|
+
to_check = proxy
|
90
|
+
splitted_proxy_url = urlsplit(to_check)
|
91
|
+
if not splitted_proxy_url.hostname or not splitted_proxy_url.port:
|
92
|
+
raise LacusCoreException('Invalid pre-defined proxy (needs hostname and port): {proxy}')
|
93
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
94
|
+
s.settimeout(3)
|
95
|
+
return s.connect_ex((splitted_proxy_url.hostname, splitted_proxy_url.port)) == 0
|
96
|
+
|
97
|
+
|
83
98
|
class LacusCore():
|
84
99
|
"""Capture URLs or web enabled documents using PlaywrightCapture.
|
85
100
|
|
86
101
|
:param redis_connector: Pre-configured connector to a redis instance.
|
87
102
|
:param max_capture_time: If the capture takes more than that time, break (in seconds)
|
88
103
|
:param expire_results: The capture results are stored in redis. Expire them after they are done (in seconds).
|
89
|
-
:param tor_proxy: URL to a
|
104
|
+
:param tor_proxy: URL to a SOCKS5 tor proxy. If you have tor installed, this is the default: socks5://127.0.0.1:9050.
|
105
|
+
:param i2p_proxy: URL to a HTTP I2P proxy. If you have i2p installed, this is the default: http://127.0.0.1:4444.
|
90
106
|
:param only_global_lookups: Discard captures that point to non-public IPs.
|
91
107
|
:param max_retries: How many times should we re-try a capture if it failed.
|
92
108
|
"""
|
@@ -94,7 +110,8 @@ class LacusCore():
|
|
94
110
|
def __init__(self, redis_connector: Redis[bytes], /, *,
|
95
111
|
max_capture_time: int=3600,
|
96
112
|
expire_results: int=36000,
|
97
|
-
tor_proxy: str | None=None,
|
113
|
+
tor_proxy: dict[str, str] | str | None=None,
|
114
|
+
i2p_proxy: dict[str, str] | str | None=None,
|
98
115
|
only_global_lookups: bool=True,
|
99
116
|
max_retries: int=3,
|
100
117
|
headed_allowed: bool=False,
|
@@ -105,7 +122,10 @@ class LacusCore():
|
|
105
122
|
self.redis = redis_connector
|
106
123
|
self.max_capture_time = max_capture_time
|
107
124
|
self.expire_results = expire_results
|
125
|
+
|
108
126
|
self.tor_proxy = tor_proxy
|
127
|
+
self.i2p_proxy = i2p_proxy
|
128
|
+
|
109
129
|
self.only_global_lookups = only_global_lookups
|
110
130
|
self.max_retries = max_retries
|
111
131
|
self.headed_allowed = headed_allowed
|
@@ -450,17 +470,31 @@ class LacusCore():
|
|
450
470
|
proxy = to_capture.proxy
|
451
471
|
if self.tor_proxy:
|
452
472
|
# check if onion or forced
|
453
|
-
if (proxy == 'force_tor' # if the proxy is set to "force_tor", we use the pre-configured tor proxy, regardless the URL.
|
473
|
+
if (proxy == 'force_tor' # if the proxy is set to "force_tor", we use the pre-configured tor proxy, regardless the URL, legacy feature.
|
454
474
|
or (not proxy # if the TLD is "onion", we use the pre-configured tor proxy
|
455
475
|
and splitted_url.netloc
|
456
476
|
and splitted_url.hostname
|
457
477
|
and splitted_url.hostname.split('.')[-1] == 'onion')):
|
478
|
+
if not _check_proxy_port_open(self.tor_proxy):
|
479
|
+
logger.critical(f'Unable to connect to the default tor proxy: {self.tor_proxy}')
|
480
|
+
raise CaptureError('The selected tor proxy is unreachable, unable to run the capture.')
|
458
481
|
proxy = self.tor_proxy
|
482
|
+
logger.info('Using the default tor proxy.')
|
483
|
+
if self.i2p_proxy:
|
484
|
+
if (not proxy # if the TLD is "i2p", we use the pre-configured I2P proxy
|
485
|
+
and splitted_url.netloc
|
486
|
+
and splitted_url.hostname
|
487
|
+
and splitted_url.hostname.split('.')[-1] == 'i2p'):
|
488
|
+
if not _check_proxy_port_open(self.i2p_proxy):
|
489
|
+
logger.critical(f'Unable to connect to the default tor proxy: {self.i2p_proxy}')
|
490
|
+
raise CaptureError('The selected I2P proxy is unreachable, unable to run the capture.')
|
491
|
+
proxy = self.i2p_proxy
|
492
|
+
logger.info('Using the default I2P proxy.')
|
459
493
|
|
460
494
|
if self.only_global_lookups and not proxy and splitted_url.scheme not in ['data', 'file']:
|
461
495
|
# not relevant if we also have a proxy, or the thing to capture is a data URI or a file on disk
|
462
496
|
if splitted_url.netloc:
|
463
|
-
if splitted_url.hostname and splitted_url.hostname.split('.')[-1]
|
497
|
+
if splitted_url.hostname and splitted_url.hostname.split('.')[-1] not in ['onion', 'i2p']:
|
464
498
|
ips_to_check = []
|
465
499
|
# check if the hostname is an IP
|
466
500
|
try:
|
@@ -1,10 +1,10 @@
|
|
1
1
|
lacuscore/__init__.py,sha256=aLBshQPT9IBDKn5qWrX9A_exqtLFPyLsQiPWdfpAFjA,537
|
2
2
|
lacuscore/helpers.py,sha256=dTt-FM7SnwEgIeGTCAvREwa1K224iNb16mmaCwcY_eg,14096
|
3
3
|
lacuscore/lacus_monitoring.py,sha256=r6IaYuh6sMq43eOWdZx0fU8p4PWVZlqSD6nr6yOaTUU,2713
|
4
|
-
lacuscore/lacuscore.py,sha256=
|
4
|
+
lacuscore/lacuscore.py,sha256=uz5GJoPZrBTfsWNaCm6Bealc5JIOy7GYe78EqoazDyc,47025
|
5
5
|
lacuscore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
lacuscore/task_logger.py,sha256=2wDotU6r6vn-aKO8nZNdxSuisSj11LlcxuvW60qPL0Y,1909
|
7
|
-
lacuscore-1.17.
|
8
|
-
lacuscore-1.17.
|
9
|
-
lacuscore-1.17.
|
10
|
-
lacuscore-1.17.
|
7
|
+
lacuscore-1.17.3.dist-info/LICENSE,sha256=4C4hLYrIkUD96Ggk-y_Go1Qf7PBZrEm9PSeTGe2nd4s,1516
|
8
|
+
lacuscore-1.17.3.dist-info/METADATA,sha256=TC73WMFDvZYoe6OBM_jZ1bg9qBNOvpa85QGdLrajtlI,2654
|
9
|
+
lacuscore-1.17.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
10
|
+
lacuscore-1.17.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|