lacuscore 1.12.9__tar.gz → 1.13.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lacuscore-1.12.9 → lacuscore-1.13.0}/PKG-INFO +3 -4
- {lacuscore-1.12.9 → lacuscore-1.13.0}/lacuscore/helpers.py +1 -0
- {lacuscore-1.12.9 → lacuscore-1.13.0}/lacuscore/lacuscore.py +12 -8
- {lacuscore-1.12.9 → lacuscore-1.13.0}/pyproject.toml +5 -5
- {lacuscore-1.12.9 → lacuscore-1.13.0}/LICENSE +0 -0
- {lacuscore-1.12.9 → lacuscore-1.13.0}/README.md +0 -0
- {lacuscore-1.12.9 → lacuscore-1.13.0}/lacuscore/__init__.py +0 -0
- {lacuscore-1.12.9 → lacuscore-1.13.0}/lacuscore/lacus_monitoring.py +0 -0
- {lacuscore-1.12.9 → lacuscore-1.13.0}/lacuscore/py.typed +0 -0
- {lacuscore-1.12.9 → lacuscore-1.13.0}/lacuscore/task_logger.py +0 -0
@@ -1,11 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lacuscore
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.13.0
|
4
4
|
Summary: Core of Lacus, usable as a module
|
5
5
|
License: BSD-3-Clause
|
6
6
|
Author: Raphaël Vinot
|
7
7
|
Author-email: raphael.vinot@circl.lu
|
8
|
-
Requires-Python: >=3.9,<4.0
|
8
|
+
Requires-Python: >=3.9.2,<4.0
|
9
9
|
Classifier: Development Status :: 5 - Production/Stable
|
10
10
|
Classifier: Environment :: Console
|
11
11
|
Classifier: Intended Audience :: Information Technology
|
@@ -14,7 +14,6 @@ Classifier: Intended Audience :: Telecommunications Industry
|
|
14
14
|
Classifier: License :: OSI Approved :: BSD License
|
15
15
|
Classifier: Operating System :: POSIX :: Linux
|
16
16
|
Classifier: Programming Language :: Python :: 3
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
18
17
|
Classifier: Programming Language :: Python :: 3.10
|
19
18
|
Classifier: Programming Language :: Python :: 3.11
|
20
19
|
Classifier: Programming Language :: Python :: 3.12
|
@@ -27,7 +26,7 @@ Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
|
|
27
26
|
Requires-Dist: defang (>=0.5.3)
|
28
27
|
Requires-Dist: dnspython (>=2.7.0)
|
29
28
|
Requires-Dist: eval-type-backport (>=0.2.2) ; python_version < "3.10"
|
30
|
-
Requires-Dist: playwrightcapture[recaptcha] (>=1.
|
29
|
+
Requires-Dist: playwrightcapture[recaptcha] (>=1.28.0)
|
31
30
|
Requires-Dist: pydantic (>=2.10.6)
|
32
31
|
Requires-Dist: redis[hiredis] (>=5.2.1)
|
33
32
|
Requires-Dist: requests (>=2.32.3)
|
@@ -12,7 +12,6 @@ import re
|
|
12
12
|
import sys
|
13
13
|
import time
|
14
14
|
import unicodedata
|
15
|
-
import zlib
|
16
15
|
|
17
16
|
from asyncio import Task
|
18
17
|
from base64 import b64decode, b64encode
|
@@ -95,15 +94,19 @@ class LacusCore():
|
|
95
94
|
tor_proxy: str | None=None,
|
96
95
|
only_global_lookups: bool=True,
|
97
96
|
max_retries: int=3,
|
97
|
+
headed_allowed: bool=False,
|
98
98
|
loglevel: str | int='INFO') -> None:
|
99
99
|
self.master_logger = logging.getLogger(f'{self.__class__.__name__}')
|
100
100
|
self.master_logger.setLevel(loglevel)
|
101
|
+
|
101
102
|
self.redis = redis_connector
|
102
103
|
self.max_capture_time = max_capture_time
|
103
104
|
self.expire_results = expire_results
|
104
105
|
self.tor_proxy = tor_proxy
|
105
106
|
self.only_global_lookups = only_global_lookups
|
106
107
|
self.max_retries = max_retries
|
108
|
+
self.headed_allowed = headed_allowed
|
109
|
+
|
107
110
|
self.dnsresolver: Resolver = Resolver()
|
108
111
|
self.dnsresolver.cache = Cache(900)
|
109
112
|
self.dnsresolver.timeout = 2
|
@@ -139,6 +142,7 @@ class LacusCore():
|
|
139
142
|
rendered_hostname_only: bool=True,
|
140
143
|
with_favicon: bool=False,
|
141
144
|
allow_tracking: bool=False,
|
145
|
+
headless: bool=True,
|
142
146
|
max_retries: int | None=None,
|
143
147
|
force: bool=False,
|
144
148
|
recapture_interval: int=300,
|
@@ -169,6 +173,7 @@ class LacusCore():
|
|
169
173
|
rendered_hostname_only: bool=True,
|
170
174
|
with_favicon: bool=False,
|
171
175
|
allow_tracking: bool=False,
|
176
|
+
headless: bool=True,
|
172
177
|
max_retries: int | None=None,
|
173
178
|
force: bool=False,
|
174
179
|
recapture_interval: int=300,
|
@@ -201,6 +206,7 @@ class LacusCore():
|
|
201
206
|
:param rendered_hostname_only: If depth > 0: only capture URLs with the same hostname as the rendered page
|
202
207
|
:param with_favicon: If True, PlaywrightCapture will attempt to get the potential favicons for the rendered URL. It is a dirty trick, see this issue for details: https://github.com/Lookyloo/PlaywrightCapture/issues/45
|
203
208
|
:param allow_tracking: If True, PlaywrightCapture will attempt to click through the cookie banners. It is totally dependent on the framework used on the website.
|
209
|
+
:param headless: Whether to run the browser in headless mode. WARNING: requires to run in a graphical environment.
|
204
210
|
:param max_retries: The maximum anount of retries for this capture
|
205
211
|
|
206
212
|
:param force: Force recapture, even if the same one was already done within the recapture_interval
|
@@ -221,7 +227,10 @@ class LacusCore():
|
|
221
227
|
'timezone_id': timezone_id, 'locale': locale,
|
222
228
|
'color_scheme': color_scheme, 'java_script_enabled': java_script_enabled,
|
223
229
|
'viewport': viewport, 'referer': referer, 'with_favicon': with_favicon,
|
224
|
-
'allow_tracking': allow_tracking,
|
230
|
+
'allow_tracking': allow_tracking,
|
231
|
+
# Quietly force it to true if headed is not allowed.
|
232
|
+
'headless': headless if self.headed_allowed else True,
|
233
|
+
'max_retries': max_retries}
|
225
234
|
|
226
235
|
try:
|
227
236
|
to_enqueue = CaptureSettings(**settings)
|
@@ -488,6 +497,7 @@ class LacusCore():
|
|
488
497
|
proxy=proxy,
|
489
498
|
general_timeout_in_sec=to_capture.general_timeout_in_sec,
|
490
499
|
loglevel=self.master_logger.getEffectiveLevel(),
|
500
|
+
headless=to_capture.headless,
|
491
501
|
uuid=uuid) as capture:
|
492
502
|
# required by Mypy: https://github.com/python/mypy/issues/3004
|
493
503
|
capture.headers = to_capture.headers # type: ignore[assignment]
|
@@ -691,12 +701,6 @@ class LacusCore():
|
|
691
701
|
if root_key is None:
|
692
702
|
root_key = f'lacus:capture_results_hash:{capture_uuid}'
|
693
703
|
|
694
|
-
if not self.redis.exists(root_key):
|
695
|
-
if old_response := self.redis.get(f'lacus:capture_results:{capture_uuid}'):
|
696
|
-
# TODO: remove in 1.8.* - old format used last in 1.6, and kept no more than 10H in redis
|
697
|
-
return pickle.loads(zlib.decompress(old_response))
|
698
|
-
return None
|
699
|
-
|
700
704
|
# New format and capture done
|
701
705
|
|
702
706
|
to_return: CaptureResponse = {}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "lacuscore"
|
3
|
-
version = "1.
|
3
|
+
version = "1.13.0"
|
4
4
|
description = "Core of Lacus, usable as a module"
|
5
5
|
authors = [
|
6
6
|
{name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
|
@@ -9,13 +9,13 @@ license = "BSD-3-Clause"
|
|
9
9
|
repository = "https://github.com/ail-project/LacusCore"
|
10
10
|
documentation = "https://lacuscore.readthedocs.io/en/latest/"
|
11
11
|
readme = "README.md"
|
12
|
-
requires-python = ">=3.9,<4.0"
|
12
|
+
requires-python = ">=3.9.2,<4.0"
|
13
13
|
|
14
14
|
dynamic = [ "classifiers" ]
|
15
15
|
|
16
16
|
dependencies = [
|
17
17
|
"requests (>=2.32.3)",
|
18
|
-
"playwrightcapture[recaptcha] (>=1.
|
18
|
+
"playwrightcapture[recaptcha] (>=1.28.0)",
|
19
19
|
"defang (>=0.5.3)",
|
20
20
|
"ua-parser[regex] (>=1.0.1)",
|
21
21
|
"redis [hiredis] (>=5.2.1)",
|
@@ -46,10 +46,10 @@ classifiers = [
|
|
46
46
|
docs = ["Sphinx (>=8.1.3) ; python_version >= \"3.10\""]
|
47
47
|
|
48
48
|
[tool.poetry.group.dev.dependencies]
|
49
|
-
mypy = "^1.
|
49
|
+
mypy = "^1.15.0"
|
50
50
|
types-redis = {version = "^4.6.0.20241004"}
|
51
51
|
types-requests = "^2.32.0.20241016"
|
52
|
-
types-beautifulsoup4 = "^4.12.0.
|
52
|
+
types-beautifulsoup4 = "^4.12.0.20250204"
|
53
53
|
pytest = "^8.3.4"
|
54
54
|
|
55
55
|
[build-system]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|