lacuscore 1.17.3__py3-none-any.whl → 1.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lacuscore/helpers.py CHANGED
@@ -88,6 +88,7 @@ class CaptureResponseJson(TypedDict, total=False):
88
88
  downloaded_filename: str | None
89
89
  downloaded_file: str | None
90
90
  children: list[CaptureResponseJson] | None
91
+ trusted_timestamps: dict[str, str] | None
91
92
  runtime: float | None
92
93
  potential_favicons: list[str] | None
93
94
 
@@ -122,6 +123,7 @@ class CaptureSettings(BaseModel):
122
123
  allow_tracking: bool = False
123
124
  headless: bool = True
124
125
  init_script: str | None = None
126
+ with_trusted_timestamps: bool = False
125
127
  force: bool = False
126
128
  recapture_interval: int = 300
127
129
  priority: int = 0
lacuscore/lacuscore.py CHANGED
@@ -31,7 +31,7 @@ from dns.asyncresolver import Resolver
31
31
  from dns.exception import DNSException
32
32
  from dns.exception import Timeout as DNSTimeout
33
33
 
34
- from playwrightcapture import Capture, PlaywrightCaptureException, InvalidPlaywrightParameter
34
+ from playwrightcapture import Capture, PlaywrightCaptureException, InvalidPlaywrightParameter, TrustedTimestampSettings
35
35
  from pydantic import ValidationError
36
36
  from redis import Redis
37
37
  from redis.exceptions import ConnectionError as RedisConnectionError
@@ -115,6 +115,7 @@ class LacusCore():
115
115
  only_global_lookups: bool=True,
116
116
  max_retries: int=3,
117
117
  headed_allowed: bool=False,
118
+ tt_settings: TrustedTimestampSettings | None=None,
118
119
  loglevel: str | int='INFO') -> None:
119
120
  self.master_logger = logging.getLogger(f'{self.__class__.__name__}')
120
121
  self.master_logger.setLevel(loglevel)
@@ -126,6 +127,8 @@ class LacusCore():
126
127
  self.tor_proxy = tor_proxy
127
128
  self.i2p_proxy = i2p_proxy
128
129
 
130
+ self.tt_settings = tt_settings
131
+
129
132
  self.only_global_lookups = only_global_lookups
130
133
  self.max_retries = max_retries
131
134
  self.headed_allowed = headed_allowed
@@ -167,6 +170,7 @@ class LacusCore():
167
170
  rendered_hostname_only: bool=True,
168
171
  with_screenshot: bool=True,
169
172
  with_favicon: bool=False,
173
+ with_trusted_timestamps: bool=False,
170
174
  allow_tracking: bool=False,
171
175
  headless: bool=True,
172
176
  max_retries: int | None=None,
@@ -202,6 +206,7 @@ class LacusCore():
202
206
  rendered_hostname_only: bool=True,
203
207
  with_screenshot: bool=True,
204
208
  with_favicon: bool=False,
209
+ with_trusted_timestamps: bool=False,
205
210
  allow_tracking: bool=False,
206
211
  headless: bool=True,
207
212
  max_retries: int | None=None,
@@ -239,6 +244,7 @@ class LacusCore():
239
244
  :param rendered_hostname_only: If depth > 0: only capture URLs with the same hostname as the rendered page
240
245
  :param with_screenshot: If False, PlaywrightCapture won't take a screenshot of the rendered URL
241
246
  :param with_favicon: If True, PlaywrightCapture will attempt to get the potential favicons for the rendered URL. It is a dirty trick, see this issue for details: https://github.com/Lookyloo/PlaywrightCapture/issues/45
247
+ :param with_trusted_timestamps: If True, PlaywrightCapture will trigger calls to a remote timestamp service. For that to work, this class must have been initialized with tt_settings. See RFC3161 for details: https://www.rfc-editor.org/rfc/rfc3161
242
248
  :param allow_tracking: If True, PlaywrightCapture will attempt to click through the cookie banners. It is totally dependent on the framework used on the website.
243
249
  :param headless: Whether to run the browser in headless mode. WARNING: requires to run in a graphical environment.
244
250
  :param max_retries: The maximum anount of retries for this capture
@@ -263,6 +269,7 @@ class LacusCore():
263
269
  'color_scheme': color_scheme, 'java_script_enabled': java_script_enabled,
264
270
  'viewport': viewport, 'referer': referer,
265
271
  'with_screenshot': with_screenshot, 'with_favicon': with_favicon,
272
+ 'with_trusted_timestamps': with_trusted_timestamps,
266
273
  'allow_tracking': allow_tracking,
267
274
  # Quietly force it to true if headed is not allowed.
268
275
  'headless': headless if self.headed_allowed else True,
@@ -294,6 +301,10 @@ class LacusCore():
294
301
  else:
295
302
  perma_uuid = str(uuid4())
296
303
 
304
+ if to_enqueue.with_trusted_timestamps and not self.tt_settings:
305
+ self.master_logger.warning('Cannot trigger trusted timestamp, the remote timestamper service settings are missing.')
306
+ to_enqueue.with_trusted_timestamps = False
307
+
297
308
  p = self.redis.pipeline()
298
309
  p.set(f'lacus:query_hash:{hash_query}', perma_uuid, nx=True, ex=recapture_interval)
299
310
  p.hset(f'lacus:capture_settings:{perma_uuid}', mapping=to_enqueue.redis_dump())
@@ -568,6 +579,7 @@ class LacusCore():
568
579
  loglevel=self.master_logger.getEffectiveLevel(),
569
580
  headless=to_capture.headless,
570
581
  init_script=to_capture.init_script,
582
+ tt_settings=self.tt_settings,
571
583
  uuid=uuid) as capture:
572
584
  # required by Mypy: https://github.com/python/mypy/issues/3004
573
585
  capture.headers = to_capture.headers
@@ -601,6 +613,7 @@ class LacusCore():
601
613
  with_screenshot=to_capture.with_screenshot,
602
614
  with_favicon=to_capture.with_favicon,
603
615
  allow_tracking=to_capture.allow_tracking,
616
+ with_trusted_timestamps=to_capture.with_trusted_timestamps,
604
617
  max_depth_capture_time=self.max_capture_time)
605
618
  except (TimeoutError, asyncio.exceptions.TimeoutError):
606
619
  timeout_expired(capture_timeout, logger, 'Capture took too long.')
@@ -757,6 +770,8 @@ class LacusCore():
757
770
  if results.get('html') and results['html'] is not None:
758
771
  # Need to avoid unicode encore errors, and surrogates are not allowed
759
772
  hash_to_set['html'] = results['html'].encode('utf-8', 'surrogateescape')
773
+ if results.get('trusted_timestamps'):
774
+ hash_to_set['trusted_timestamps'] = pickle.dumps(results['trusted_timestamps'])
760
775
  if 'children' in results and results['children'] is not None:
761
776
  padding_length = len(str(len(results['children'])))
762
777
  children = set()
@@ -771,11 +786,10 @@ class LacusCore():
771
786
  hash_to_set['children'] = pickle.dumps(children)
772
787
 
773
788
  for key in results.keys():
774
- if key in ['har', 'cookies', 'storage', 'potential_favicons', 'html', 'children'] or not results.get(key):
789
+ if key in ['har', 'cookies', 'storage', 'trusted_timestamps', 'potential_favicons', 'html', 'children'] or not results.get(key):
775
790
  continue
776
791
  # these entries can be stored directly
777
792
  hash_to_set[key] = results[key] # type: ignore[literal-required]
778
-
779
793
  if hash_to_set:
780
794
  pipeline.hset(root_key, mapping=hash_to_set) # type: ignore[arg-type]
781
795
  # Make sure the key expires
@@ -800,6 +814,8 @@ class LacusCore():
800
814
  to_return['storage'] = pickle.loads(value)
801
815
  elif key == b'potential_favicons':
802
816
  to_return['potential_favicons'] = pickle.loads(value)
817
+ elif key == b'trusted_timestamps':
818
+ to_return['trusted_timestamps'] = pickle.loads(value)
803
819
  elif key == b'children':
804
820
  to_return['children'] = []
805
821
  for child_root_key in sorted(pickle.loads(value)):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lacuscore
3
- Version: 1.17.3
3
+ Version: 1.17.4
4
4
  Summary: Core of Lacus, usable as a module
5
5
  License: BSD-3-Clause
6
6
  Author: Raphaël Vinot
@@ -24,9 +24,10 @@ Provides-Extra: docs
24
24
  Requires-Dist: Sphinx (>=8.2.3) ; (python_version >= "3.11") and (extra == "docs")
25
25
  Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
26
26
  Requires-Dist: defang (>=0.5.3)
27
- Requires-Dist: dnspython (>=2.7.0)
27
+ Requires-Dist: dnspython (<2.8) ; python_version < "3.10"
28
+ Requires-Dist: dnspython (>=2.8.0) ; python_version >= "3.10"
28
29
  Requires-Dist: eval-type-backport (>=0.2.2) ; python_version < "3.10"
29
- Requires-Dist: playwrightcapture[recaptcha] (>=1.32.1)
30
+ Requires-Dist: playwrightcapture[recaptcha] (>=1.32.2)
30
31
  Requires-Dist: pydantic (>=2.11.7)
31
32
  Requires-Dist: redis[hiredis] (>=5.3.0,<6.0.0)
32
33
  Requires-Dist: requests (>=2.32.5)
@@ -0,0 +1,10 @@
1
+ lacuscore/__init__.py,sha256=aLBshQPT9IBDKn5qWrX9A_exqtLFPyLsQiPWdfpAFjA,537
2
+ lacuscore/helpers.py,sha256=E_pgZ6vgtOkb_NCIjgy1c041CSy00arL739hPHVxlS8,14184
3
+ lacuscore/lacus_monitoring.py,sha256=r6IaYuh6sMq43eOWdZx0fU8p4PWVZlqSD6nr6yOaTUU,2713
4
+ lacuscore/lacuscore.py,sha256=jrFGBFSYTWRquZsZx7KQAfDc8KTT_nfu7-c6mnrh-pk,48275
5
+ lacuscore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ lacuscore/task_logger.py,sha256=2wDotU6r6vn-aKO8nZNdxSuisSj11LlcxuvW60qPL0Y,1909
7
+ lacuscore-1.17.4.dist-info/LICENSE,sha256=4C4hLYrIkUD96Ggk-y_Go1Qf7PBZrEm9PSeTGe2nd4s,1516
8
+ lacuscore-1.17.4.dist-info/METADATA,sha256=LCRvwvOmSz65zlMO1iWUjpJ5lGnPVhGm8uRQ1juGBm4,2739
9
+ lacuscore-1.17.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
10
+ lacuscore-1.17.4.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- lacuscore/__init__.py,sha256=aLBshQPT9IBDKn5qWrX9A_exqtLFPyLsQiPWdfpAFjA,537
2
- lacuscore/helpers.py,sha256=dTt-FM7SnwEgIeGTCAvREwa1K224iNb16mmaCwcY_eg,14096
3
- lacuscore/lacus_monitoring.py,sha256=r6IaYuh6sMq43eOWdZx0fU8p4PWVZlqSD6nr6yOaTUU,2713
4
- lacuscore/lacuscore.py,sha256=uz5GJoPZrBTfsWNaCm6Bealc5JIOy7GYe78EqoazDyc,47025
5
- lacuscore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- lacuscore/task_logger.py,sha256=2wDotU6r6vn-aKO8nZNdxSuisSj11LlcxuvW60qPL0Y,1909
7
- lacuscore-1.17.3.dist-info/LICENSE,sha256=4C4hLYrIkUD96Ggk-y_Go1Qf7PBZrEm9PSeTGe2nd4s,1516
8
- lacuscore-1.17.3.dist-info/METADATA,sha256=TC73WMFDvZYoe6OBM_jZ1bg9qBNOvpa85QGdLrajtlI,2654
9
- lacuscore-1.17.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
10
- lacuscore-1.17.3.dist-info/RECORD,,