lacuscore 1.17.3__tar.gz → 1.17.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lacuscore-1.17.3 → lacuscore-1.17.4}/PKG-INFO +4 -3
- {lacuscore-1.17.3 → lacuscore-1.17.4}/lacuscore/helpers.py +2 -0
- {lacuscore-1.17.3 → lacuscore-1.17.4}/lacuscore/lacuscore.py +19 -3
- {lacuscore-1.17.3 → lacuscore-1.17.4}/pyproject.toml +4 -3
- {lacuscore-1.17.3 → lacuscore-1.17.4}/LICENSE +0 -0
- {lacuscore-1.17.3 → lacuscore-1.17.4}/README.md +0 -0
- {lacuscore-1.17.3 → lacuscore-1.17.4}/lacuscore/__init__.py +0 -0
- {lacuscore-1.17.3 → lacuscore-1.17.4}/lacuscore/lacus_monitoring.py +0 -0
- {lacuscore-1.17.3 → lacuscore-1.17.4}/lacuscore/py.typed +0 -0
- {lacuscore-1.17.3 → lacuscore-1.17.4}/lacuscore/task_logger.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lacuscore
|
3
|
-
Version: 1.17.
|
3
|
+
Version: 1.17.4
|
4
4
|
Summary: Core of Lacus, usable as a module
|
5
5
|
License: BSD-3-Clause
|
6
6
|
Author: Raphaël Vinot
|
@@ -24,9 +24,10 @@ Provides-Extra: docs
|
|
24
24
|
Requires-Dist: Sphinx (>=8.2.3) ; (python_version >= "3.11") and (extra == "docs")
|
25
25
|
Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
|
26
26
|
Requires-Dist: defang (>=0.5.3)
|
27
|
-
Requires-Dist: dnspython (
|
27
|
+
Requires-Dist: dnspython (<2.8) ; python_version < "3.10"
|
28
|
+
Requires-Dist: dnspython (>=2.8.0) ; python_version >= "3.10"
|
28
29
|
Requires-Dist: eval-type-backport (>=0.2.2) ; python_version < "3.10"
|
29
|
-
Requires-Dist: playwrightcapture[recaptcha] (>=1.32.
|
30
|
+
Requires-Dist: playwrightcapture[recaptcha] (>=1.32.2)
|
30
31
|
Requires-Dist: pydantic (>=2.11.7)
|
31
32
|
Requires-Dist: redis[hiredis] (>=5.3.0,<6.0.0)
|
32
33
|
Requires-Dist: requests (>=2.32.5)
|
@@ -88,6 +88,7 @@ class CaptureResponseJson(TypedDict, total=False):
|
|
88
88
|
downloaded_filename: str | None
|
89
89
|
downloaded_file: str | None
|
90
90
|
children: list[CaptureResponseJson] | None
|
91
|
+
trusted_timestamps: dict[str, str] | None
|
91
92
|
runtime: float | None
|
92
93
|
potential_favicons: list[str] | None
|
93
94
|
|
@@ -122,6 +123,7 @@ class CaptureSettings(BaseModel):
|
|
122
123
|
allow_tracking: bool = False
|
123
124
|
headless: bool = True
|
124
125
|
init_script: str | None = None
|
126
|
+
with_trusted_timestamps: bool = False
|
125
127
|
force: bool = False
|
126
128
|
recapture_interval: int = 300
|
127
129
|
priority: int = 0
|
@@ -31,7 +31,7 @@ from dns.asyncresolver import Resolver
|
|
31
31
|
from dns.exception import DNSException
|
32
32
|
from dns.exception import Timeout as DNSTimeout
|
33
33
|
|
34
|
-
from playwrightcapture import Capture, PlaywrightCaptureException, InvalidPlaywrightParameter
|
34
|
+
from playwrightcapture import Capture, PlaywrightCaptureException, InvalidPlaywrightParameter, TrustedTimestampSettings
|
35
35
|
from pydantic import ValidationError
|
36
36
|
from redis import Redis
|
37
37
|
from redis.exceptions import ConnectionError as RedisConnectionError
|
@@ -115,6 +115,7 @@ class LacusCore():
|
|
115
115
|
only_global_lookups: bool=True,
|
116
116
|
max_retries: int=3,
|
117
117
|
headed_allowed: bool=False,
|
118
|
+
tt_settings: TrustedTimestampSettings | None=None,
|
118
119
|
loglevel: str | int='INFO') -> None:
|
119
120
|
self.master_logger = logging.getLogger(f'{self.__class__.__name__}')
|
120
121
|
self.master_logger.setLevel(loglevel)
|
@@ -126,6 +127,8 @@ class LacusCore():
|
|
126
127
|
self.tor_proxy = tor_proxy
|
127
128
|
self.i2p_proxy = i2p_proxy
|
128
129
|
|
130
|
+
self.tt_settings = tt_settings
|
131
|
+
|
129
132
|
self.only_global_lookups = only_global_lookups
|
130
133
|
self.max_retries = max_retries
|
131
134
|
self.headed_allowed = headed_allowed
|
@@ -167,6 +170,7 @@ class LacusCore():
|
|
167
170
|
rendered_hostname_only: bool=True,
|
168
171
|
with_screenshot: bool=True,
|
169
172
|
with_favicon: bool=False,
|
173
|
+
with_trusted_timestamps: bool=False,
|
170
174
|
allow_tracking: bool=False,
|
171
175
|
headless: bool=True,
|
172
176
|
max_retries: int | None=None,
|
@@ -202,6 +206,7 @@ class LacusCore():
|
|
202
206
|
rendered_hostname_only: bool=True,
|
203
207
|
with_screenshot: bool=True,
|
204
208
|
with_favicon: bool=False,
|
209
|
+
with_trusted_timestamps: bool=False,
|
205
210
|
allow_tracking: bool=False,
|
206
211
|
headless: bool=True,
|
207
212
|
max_retries: int | None=None,
|
@@ -239,6 +244,7 @@ class LacusCore():
|
|
239
244
|
:param rendered_hostname_only: If depth > 0: only capture URLs with the same hostname as the rendered page
|
240
245
|
:param with_screenshot: If False, PlaywrightCapture won't take a screenshot of the rendered URL
|
241
246
|
:param with_favicon: If True, PlaywrightCapture will attempt to get the potential favicons for the rendered URL. It is a dirty trick, see this issue for details: https://github.com/Lookyloo/PlaywrightCapture/issues/45
|
247
|
+
:param with_trusted_timestamps: If True, PlaywrightCapture will trigger calls to a remote timestamp service. For that to work, this class must have been initialized with tt_settings. See RFC3161 for details: https://www.rfc-editor.org/rfc/rfc3161
|
242
248
|
:param allow_tracking: If True, PlaywrightCapture will attempt to click through the cookie banners. It is totally dependent on the framework used on the website.
|
243
249
|
:param headless: Whether to run the browser in headless mode. WARNING: requires to run in a graphical environment.
|
244
250
|
:param max_retries: The maximum anount of retries for this capture
|
@@ -263,6 +269,7 @@ class LacusCore():
|
|
263
269
|
'color_scheme': color_scheme, 'java_script_enabled': java_script_enabled,
|
264
270
|
'viewport': viewport, 'referer': referer,
|
265
271
|
'with_screenshot': with_screenshot, 'with_favicon': with_favicon,
|
272
|
+
'with_trusted_timestamps': with_trusted_timestamps,
|
266
273
|
'allow_tracking': allow_tracking,
|
267
274
|
# Quietly force it to true if headed is not allowed.
|
268
275
|
'headless': headless if self.headed_allowed else True,
|
@@ -294,6 +301,10 @@ class LacusCore():
|
|
294
301
|
else:
|
295
302
|
perma_uuid = str(uuid4())
|
296
303
|
|
304
|
+
if to_enqueue.with_trusted_timestamps and not self.tt_settings:
|
305
|
+
self.master_logger.warning('Cannot trigger trusted timestamp, the remote timestamper service settings are missing.')
|
306
|
+
to_enqueue.with_trusted_timestamps = False
|
307
|
+
|
297
308
|
p = self.redis.pipeline()
|
298
309
|
p.set(f'lacus:query_hash:{hash_query}', perma_uuid, nx=True, ex=recapture_interval)
|
299
310
|
p.hset(f'lacus:capture_settings:{perma_uuid}', mapping=to_enqueue.redis_dump())
|
@@ -568,6 +579,7 @@ class LacusCore():
|
|
568
579
|
loglevel=self.master_logger.getEffectiveLevel(),
|
569
580
|
headless=to_capture.headless,
|
570
581
|
init_script=to_capture.init_script,
|
582
|
+
tt_settings=self.tt_settings,
|
571
583
|
uuid=uuid) as capture:
|
572
584
|
# required by Mypy: https://github.com/python/mypy/issues/3004
|
573
585
|
capture.headers = to_capture.headers
|
@@ -601,6 +613,7 @@ class LacusCore():
|
|
601
613
|
with_screenshot=to_capture.with_screenshot,
|
602
614
|
with_favicon=to_capture.with_favicon,
|
603
615
|
allow_tracking=to_capture.allow_tracking,
|
616
|
+
with_trusted_timestamps=to_capture.with_trusted_timestamps,
|
604
617
|
max_depth_capture_time=self.max_capture_time)
|
605
618
|
except (TimeoutError, asyncio.exceptions.TimeoutError):
|
606
619
|
timeout_expired(capture_timeout, logger, 'Capture took too long.')
|
@@ -757,6 +770,8 @@ class LacusCore():
|
|
757
770
|
if results.get('html') and results['html'] is not None:
|
758
771
|
# Need to avoid unicode encore errors, and surrogates are not allowed
|
759
772
|
hash_to_set['html'] = results['html'].encode('utf-8', 'surrogateescape')
|
773
|
+
if results.get('trusted_timestamps'):
|
774
|
+
hash_to_set['trusted_timestamps'] = pickle.dumps(results['trusted_timestamps'])
|
760
775
|
if 'children' in results and results['children'] is not None:
|
761
776
|
padding_length = len(str(len(results['children'])))
|
762
777
|
children = set()
|
@@ -771,11 +786,10 @@ class LacusCore():
|
|
771
786
|
hash_to_set['children'] = pickle.dumps(children)
|
772
787
|
|
773
788
|
for key in results.keys():
|
774
|
-
if key in ['har', 'cookies', 'storage', 'potential_favicons', 'html', 'children'] or not results.get(key):
|
789
|
+
if key in ['har', 'cookies', 'storage', 'trusted_timestamps', 'potential_favicons', 'html', 'children'] or not results.get(key):
|
775
790
|
continue
|
776
791
|
# these entries can be stored directly
|
777
792
|
hash_to_set[key] = results[key] # type: ignore[literal-required]
|
778
|
-
|
779
793
|
if hash_to_set:
|
780
794
|
pipeline.hset(root_key, mapping=hash_to_set) # type: ignore[arg-type]
|
781
795
|
# Make sure the key expires
|
@@ -800,6 +814,8 @@ class LacusCore():
|
|
800
814
|
to_return['storage'] = pickle.loads(value)
|
801
815
|
elif key == b'potential_favicons':
|
802
816
|
to_return['potential_favicons'] = pickle.loads(value)
|
817
|
+
elif key == b'trusted_timestamps':
|
818
|
+
to_return['trusted_timestamps'] = pickle.loads(value)
|
803
819
|
elif key == b'children':
|
804
820
|
to_return['children'] = []
|
805
821
|
for child_root_key in sorted(pickle.loads(value)):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "lacuscore"
|
3
|
-
version = "1.17.
|
3
|
+
version = "1.17.4"
|
4
4
|
description = "Core of Lacus, usable as a module"
|
5
5
|
authors = [
|
6
6
|
{name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
|
@@ -15,11 +15,12 @@ dynamic = [ "classifiers" ]
|
|
15
15
|
|
16
16
|
dependencies = [
|
17
17
|
"requests (>=2.32.5)",
|
18
|
-
"playwrightcapture[recaptcha] (>=1.32.
|
18
|
+
"playwrightcapture[recaptcha] (>=1.32.2)",
|
19
19
|
"defang (>=0.5.3)",
|
20
20
|
"ua-parser[regex] (>=1.0.1)",
|
21
21
|
"redis[hiredis] (>=5.3.0,<6.0.0)",
|
22
|
-
"dnspython (
|
22
|
+
"dnspython (<2.8) ; python_version < \"3.10\"",
|
23
|
+
"dnspython (>=2.8.0) ; python_version >= \"3.10\"",
|
23
24
|
"async-timeout (>=5.0.1) ; python_version < \"3.11\"",
|
24
25
|
"pydantic (>=2.11.7)",
|
25
26
|
"eval-type-backport (>=0.2.2) ; python_version < \"3.10\"",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|