lacuscore 1.17.2__tar.gz → 1.17.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lacuscore
3
- Version: 1.17.2
3
+ Version: 1.17.3
4
4
  Summary: Core of Lacus, usable as a module
5
5
  License: BSD-3-Clause
6
6
  Author: Raphaël Vinot
@@ -9,6 +9,7 @@ import os
9
9
  import pickle
10
10
  import random
11
11
  import re
12
+ import socket
12
13
  import sys
13
14
  import time
14
15
  import unicodedata
@@ -38,6 +39,7 @@ from redis.exceptions import DataError
38
39
 
39
40
  from . import task_logger
40
41
  from .helpers import (
42
+ LacusCoreException,
41
43
  LacusCoreLogAdapter, CaptureError, RetryCapture, CaptureSettingsError,
42
44
  CaptureStatus, CaptureResponse, CaptureResponseJson, CaptureSettings)
43
45
 
@@ -80,6 +82,19 @@ def _secure_filename(filename: str) -> str:
80
82
  return filename
81
83
 
82
84
 
85
+ def _check_proxy_port_open(proxy: dict[str, str] | str) -> bool:
86
+ if isinstance(proxy, dict):
87
+ to_check = proxy['server']
88
+ else:
89
+ to_check = proxy
90
+ splitted_proxy_url = urlsplit(to_check)
91
+ if not splitted_proxy_url.hostname or not splitted_proxy_url.port:
92
+ raise LacusCoreException('Invalid pre-defined proxy (needs hostname and port): {proxy}')
93
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
94
+ s.settimeout(3)
95
+ return s.connect_ex((splitted_proxy_url.hostname, splitted_proxy_url.port)) == 0
96
+
97
+
83
98
  class LacusCore():
84
99
  """Capture URLs or web enabled documents using PlaywrightCapture.
85
100
 
@@ -95,8 +110,8 @@ class LacusCore():
95
110
  def __init__(self, redis_connector: Redis[bytes], /, *,
96
111
  max_capture_time: int=3600,
97
112
  expire_results: int=36000,
98
- tor_proxy: str | None=None,
99
- i2p_proxy: str | None=None,
113
+ tor_proxy: dict[str, str] | str | None=None,
114
+ i2p_proxy: dict[str, str] | str | None=None,
100
115
  only_global_lookups: bool=True,
101
116
  max_retries: int=3,
102
117
  headed_allowed: bool=False,
@@ -107,8 +122,10 @@ class LacusCore():
107
122
  self.redis = redis_connector
108
123
  self.max_capture_time = max_capture_time
109
124
  self.expire_results = expire_results
125
+
110
126
  self.tor_proxy = tor_proxy
111
127
  self.i2p_proxy = i2p_proxy
128
+
112
129
  self.only_global_lookups = only_global_lookups
113
130
  self.max_retries = max_retries
114
131
  self.headed_allowed = headed_allowed
@@ -453,18 +470,26 @@ class LacusCore():
453
470
  proxy = to_capture.proxy
454
471
  if self.tor_proxy:
455
472
  # check if onion or forced
456
- if (proxy == 'force_tor' # if the proxy is set to "force_tor", we use the pre-configured tor proxy, regardless the URL.
473
+ if (proxy == 'force_tor' # if the proxy is set to "force_tor", we use the pre-configured tor proxy, regardless the URL, legacy feature.
457
474
  or (not proxy # if the TLD is "onion", we use the pre-configured tor proxy
458
475
  and splitted_url.netloc
459
476
  and splitted_url.hostname
460
477
  and splitted_url.hostname.split('.')[-1] == 'onion')):
478
+ if not _check_proxy_port_open(self.tor_proxy):
479
+ logger.critical(f'Unable to connect to the default tor proxy: {self.tor_proxy}')
480
+ raise CaptureError('The selected tor proxy is unreachable, unable to run the capture.')
461
481
  proxy = self.tor_proxy
462
- elif self.i2p_proxy:
482
+ logger.info('Using the default tor proxy.')
483
+ if self.i2p_proxy:
463
484
  if (not proxy # if the TLD is "i2p", we use the pre-configured I2P proxy
464
485
  and splitted_url.netloc
465
486
  and splitted_url.hostname
466
487
  and splitted_url.hostname.split('.')[-1] == 'i2p'):
488
+ if not _check_proxy_port_open(self.i2p_proxy):
489
+ logger.critical(f'Unable to connect to the default tor proxy: {self.i2p_proxy}')
490
+ raise CaptureError('The selected I2P proxy is unreachable, unable to run the capture.')
467
491
  proxy = self.i2p_proxy
492
+ logger.info('Using the default I2P proxy.')
468
493
 
469
494
  if self.only_global_lookups and not proxy and splitted_url.scheme not in ['data', 'file']:
470
495
  # not relevant if we also have a proxy, or the thing to capture is a data URI or a file on disk
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lacuscore"
3
- version = "1.17.2"
3
+ version = "1.17.3"
4
4
  description = "Core of Lacus, usable as a module"
5
5
  authors = [
6
6
  {name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
File without changes
File without changes