lacuscore 1.17.1__tar.gz → 1.17.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lacuscore
3
- Version: 1.17.1
3
+ Version: 1.17.3
4
4
  Summary: Core of Lacus, usable as a module
5
5
  License: BSD-3-Clause
6
6
  Author: Raphaël Vinot
@@ -9,6 +9,7 @@ import os
9
9
  import pickle
10
10
  import random
11
11
  import re
12
+ import socket
12
13
  import sys
13
14
  import time
14
15
  import unicodedata
@@ -38,6 +39,7 @@ from redis.exceptions import DataError
38
39
 
39
40
  from . import task_logger
40
41
  from .helpers import (
42
+ LacusCoreException,
41
43
  LacusCoreLogAdapter, CaptureError, RetryCapture, CaptureSettingsError,
42
44
  CaptureStatus, CaptureResponse, CaptureResponseJson, CaptureSettings)
43
45
 
@@ -80,13 +82,27 @@ def _secure_filename(filename: str) -> str:
80
82
  return filename
81
83
 
82
84
 
85
+ def _check_proxy_port_open(proxy: dict[str, str] | str) -> bool:
86
+ if isinstance(proxy, dict):
87
+ to_check = proxy['server']
88
+ else:
89
+ to_check = proxy
90
+ splitted_proxy_url = urlsplit(to_check)
91
+ if not splitted_proxy_url.hostname or not splitted_proxy_url.port:
92
+ raise LacusCoreException('Invalid pre-defined proxy (needs hostname and port): {proxy}')
93
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
94
+ s.settimeout(3)
95
+ return s.connect_ex((splitted_proxy_url.hostname, splitted_proxy_url.port)) == 0
96
+
97
+
83
98
  class LacusCore():
84
99
  """Capture URLs or web enabled documents using PlaywrightCapture.
85
100
 
86
101
  :param redis_connector: Pre-configured connector to a redis instance.
87
102
  :param max_capture_time: If the capture takes more than that time, break (in seconds)
88
103
  :param expire_results: The capture results are stored in redis. Expire them after they are done (in seconds).
89
- :param tor_proxy: URL to a SOCKS 5 tor proxy. If you have tor installed, this is the default: socks5://127.0.0.1:9050.
104
+ :param tor_proxy: URL to a SOCKS5 tor proxy. If you have tor installed, this is the default: socks5://127.0.0.1:9050.
105
+ :param i2p_proxy: URL to a HTTP I2P proxy. If you have i2p installed, this is the default: http://127.0.0.1:4444.
90
106
  :param only_global_lookups: Discard captures that point to non-public IPs.
91
107
  :param max_retries: How many times should we re-try a capture if it failed.
92
108
  """
@@ -94,7 +110,8 @@ class LacusCore():
94
110
  def __init__(self, redis_connector: Redis[bytes], /, *,
95
111
  max_capture_time: int=3600,
96
112
  expire_results: int=36000,
97
- tor_proxy: str | None=None,
113
+ tor_proxy: dict[str, str] | str | None=None,
114
+ i2p_proxy: dict[str, str] | str | None=None,
98
115
  only_global_lookups: bool=True,
99
116
  max_retries: int=3,
100
117
  headed_allowed: bool=False,
@@ -105,7 +122,10 @@ class LacusCore():
105
122
  self.redis = redis_connector
106
123
  self.max_capture_time = max_capture_time
107
124
  self.expire_results = expire_results
125
+
108
126
  self.tor_proxy = tor_proxy
127
+ self.i2p_proxy = i2p_proxy
128
+
109
129
  self.only_global_lookups = only_global_lookups
110
130
  self.max_retries = max_retries
111
131
  self.headed_allowed = headed_allowed
@@ -450,17 +470,31 @@ class LacusCore():
450
470
  proxy = to_capture.proxy
451
471
  if self.tor_proxy:
452
472
  # check if onion or forced
453
- if (proxy == 'force_tor' # if the proxy is set to "force_tor", we use the pre-configured tor proxy, regardless the URL.
473
+ if (proxy == 'force_tor' # if the proxy is set to "force_tor", we use the pre-configured tor proxy, regardless the URL, legacy feature.
454
474
  or (not proxy # if the TLD is "onion", we use the pre-configured tor proxy
455
475
  and splitted_url.netloc
456
476
  and splitted_url.hostname
457
477
  and splitted_url.hostname.split('.')[-1] == 'onion')):
478
+ if not _check_proxy_port_open(self.tor_proxy):
479
+ logger.critical(f'Unable to connect to the default tor proxy: {self.tor_proxy}')
480
+ raise CaptureError('The selected tor proxy is unreachable, unable to run the capture.')
458
481
  proxy = self.tor_proxy
482
+ logger.info('Using the default tor proxy.')
483
+ if self.i2p_proxy:
484
+ if (not proxy # if the TLD is "i2p", we use the pre-configured I2P proxy
485
+ and splitted_url.netloc
486
+ and splitted_url.hostname
487
+ and splitted_url.hostname.split('.')[-1] == 'i2p'):
488
+ if not _check_proxy_port_open(self.i2p_proxy):
489
+ logger.critical(f'Unable to connect to the default tor proxy: {self.i2p_proxy}')
490
+ raise CaptureError('The selected I2P proxy is unreachable, unable to run the capture.')
491
+ proxy = self.i2p_proxy
492
+ logger.info('Using the default I2P proxy.')
459
493
 
460
494
  if self.only_global_lookups and not proxy and splitted_url.scheme not in ['data', 'file']:
461
495
  # not relevant if we also have a proxy, or the thing to capture is a data URI or a file on disk
462
496
  if splitted_url.netloc:
463
- if splitted_url.hostname and splitted_url.hostname.split('.')[-1] != 'onion':
497
+ if splitted_url.hostname and splitted_url.hostname.split('.')[-1] not in ['onion', 'i2p']:
464
498
  ips_to_check = []
465
499
  # check if the hostname is an IP
466
500
  try:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lacuscore"
3
- version = "1.17.1"
3
+ version = "1.17.3"
4
4
  description = "Core of Lacus, usable as a module"
5
5
  authors = [
6
6
  {name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
File without changes
File without changes