lacuscore 1.7.10__tar.gz → 1.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lacuscore-1.7.10 → lacuscore-1.8.0}/PKG-INFO +2 -2
- {lacuscore-1.7.10 → lacuscore-1.8.0}/lacuscore/lacuscore.py +41 -35
- {lacuscore-1.7.10 → lacuscore-1.8.0}/pyproject.toml +5 -4
- {lacuscore-1.7.10 → lacuscore-1.8.0}/LICENSE +0 -0
- {lacuscore-1.7.10 → lacuscore-1.8.0}/README.md +0 -0
- {lacuscore-1.7.10 → lacuscore-1.8.0}/lacuscore/__init__.py +0 -0
- {lacuscore-1.7.10 → lacuscore-1.8.0}/lacuscore/lacus_monitoring.py +0 -0
- {lacuscore-1.7.10 → lacuscore-1.8.0}/lacuscore/py.typed +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lacuscore
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.8.0
|
4
4
|
Summary: Core of Lacus, usable as a module
|
5
5
|
Home-page: https://github.com/ail-project/LacusCore
|
6
6
|
License: BSD-3-Clause
|
@@ -26,7 +26,7 @@ Provides-Extra: docs
|
|
26
26
|
Requires-Dist: Sphinx (<7.2) ; (python_version < "3.9") and (extra == "docs")
|
27
27
|
Requires-Dist: Sphinx (>=7.2,<8.0) ; (python_version >= "3.9") and (extra == "docs")
|
28
28
|
Requires-Dist: defang (>=0.5.3,<0.6.0)
|
29
|
-
Requires-Dist: playwrightcapture[recaptcha] (>=1.
|
29
|
+
Requires-Dist: playwrightcapture[recaptcha] (>=1.23.0,<2.0.0)
|
30
30
|
Requires-Dist: redis[hiredis] (>=5.0.1,<6.0.0)
|
31
31
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
32
32
|
Requires-Dist: ua-parser (>=0.18.0,<0.19.0)
|
@@ -27,13 +27,13 @@ from typing import Literal, Any, TypedDict, overload, cast, MutableMapping, Iter
|
|
27
27
|
from uuid import uuid4
|
28
28
|
from urllib.parse import urlsplit
|
29
29
|
|
30
|
-
from defang import refang # type: ignore
|
30
|
+
from defang import refang # type: ignore[import-untyped]
|
31
31
|
from playwrightcapture import Capture, PlaywrightCaptureException
|
32
32
|
from playwrightcapture.capture import CaptureResponse as PlaywrightCaptureResponse
|
33
33
|
from redis import Redis
|
34
34
|
from redis.exceptions import ConnectionError as RedisConnectionError
|
35
35
|
from redis.exceptions import DataError
|
36
|
-
from ua_parser import user_agent_parser # type: ignore
|
36
|
+
from ua_parser import user_agent_parser # type: ignore[import-untyped]
|
37
37
|
|
38
38
|
BROWSER = Literal['chromium', 'firefox', 'webkit']
|
39
39
|
|
@@ -81,7 +81,7 @@ class CaptureResponse(PlaywrightCaptureResponse, TypedDict, total=False):
|
|
81
81
|
'''A capture made by Lacus. With the base64 encoded image and downloaded file decoded to bytes.'''
|
82
82
|
|
83
83
|
# Need to make sure the type is what's expected down the line
|
84
|
-
children: list[CaptureResponse] | None # type: ignore
|
84
|
+
children: list[CaptureResponse] | None # type: ignore[misc]
|
85
85
|
|
86
86
|
status: int
|
87
87
|
runtime: float | None
|
@@ -125,9 +125,9 @@ class CaptureSettings(TypedDict, total=False):
|
|
125
125
|
viewport: dict[str, int] | None
|
126
126
|
referer: str | None
|
127
127
|
with_favicon: bool
|
128
|
-
force: bool
|
129
|
-
recapture_interval: int
|
130
|
-
priority: int
|
128
|
+
force: bool
|
129
|
+
recapture_interval: int
|
130
|
+
priority: int
|
131
131
|
uuid: str | None
|
132
132
|
|
133
133
|
depth: int
|
@@ -268,11 +268,11 @@ class LacusCore():
|
|
268
268
|
if 'url' in settings and settings['url'] is not None:
|
269
269
|
settings['url'] = settings['url'].strip()
|
270
270
|
if settings.get('force') is not None:
|
271
|
-
force = settings.pop('force')
|
271
|
+
force = settings.pop('force', False)
|
272
272
|
if settings.get('recapture_interval') is not None:
|
273
|
-
recapture_interval = settings.pop('recapture_interval')
|
273
|
+
recapture_interval = settings.pop('recapture_interval', 300)
|
274
274
|
if settings.get('priority') is not None:
|
275
|
-
priority = settings.pop('priority')
|
275
|
+
priority = settings.pop('priority', 0)
|
276
276
|
to_enqueue = settings
|
277
277
|
else:
|
278
278
|
to_enqueue = {'depth': depth, 'rendered_hostname_only': rendered_hostname_only}
|
@@ -331,17 +331,19 @@ class LacusCore():
|
|
331
331
|
|
332
332
|
mapping_capture: dict[str, bytes | float | int | str] = {}
|
333
333
|
for key, value in to_enqueue.items():
|
334
|
+
if value is None:
|
335
|
+
continue
|
334
336
|
if isinstance(value, bool):
|
335
337
|
mapping_capture[key] = 1 if value else 0
|
336
338
|
elif isinstance(value, (list, dict)):
|
337
339
|
if value:
|
338
340
|
mapping_capture[key] = json.dumps(value)
|
339
|
-
elif value
|
340
|
-
mapping_capture[key] = value
|
341
|
+
elif isinstance(value, (bytes, float, int, str)) and value not in ['', b'']: # we're ok with 0 for example
|
342
|
+
mapping_capture[key] = value
|
341
343
|
|
342
344
|
p = self.redis.pipeline()
|
343
345
|
p.set(f'lacus:query_hash:{hash_query}', perma_uuid, nx=True, ex=recapture_interval)
|
344
|
-
p.hset(f'lacus:capture_settings:{perma_uuid}', mapping=mapping_capture) # type: ignore
|
346
|
+
p.hset(f'lacus:capture_settings:{perma_uuid}', mapping=mapping_capture) # type: ignore[arg-type]
|
345
347
|
p.zadd('lacus:to_capture', {perma_uuid: priority if priority is not None else 0})
|
346
348
|
try:
|
347
349
|
p.execute()
|
@@ -405,6 +407,10 @@ class LacusCore():
|
|
405
407
|
return CaptureStatus.QUEUED
|
406
408
|
elif self.redis.zscore('lacus:ongoing', uuid) is not None:
|
407
409
|
return CaptureStatus.ONGOING
|
410
|
+
elif self.redis.exists(f'lacus:capture_settings:{uuid}'):
|
411
|
+
# we might have popped the UUID out of lacus:to_capture
|
412
|
+
# but not pused it in lacus:ongoing yet
|
413
|
+
return CaptureStatus.QUEUED
|
408
414
|
elif self.redis.exists(f'lacus:capture_results_hash:{uuid}'):
|
409
415
|
return CaptureStatus.DONE
|
410
416
|
elif self.redis.exists(f'lacus:capture_results:{uuid}'):
|
@@ -437,7 +443,7 @@ class LacusCore():
|
|
437
443
|
:param priority: Only for internal use, will decide on the priority of the capture if the try now fails.
|
438
444
|
"""
|
439
445
|
if self.redis.zscore('lacus:ongoing', uuid) is not None:
|
440
|
-
# the capture is ongoing
|
446
|
+
# the capture is already ongoing
|
441
447
|
await asyncio.sleep(1)
|
442
448
|
return
|
443
449
|
|
@@ -464,22 +470,22 @@ class LacusCore():
|
|
464
470
|
if k in ['url', 'document_name', 'browser', 'device_name', 'user_agent',
|
465
471
|
'referer', 'timezone_id', 'locale', 'color_scheme']:
|
466
472
|
# string
|
467
|
-
to_capture[k] = v.decode() # type: ignore
|
473
|
+
to_capture[k] = v.decode() # type: ignore[literal-required]
|
468
474
|
elif k in ['cookies', 'http_credentials', 'viewport', 'geolocation']:
|
469
475
|
# dicts or list
|
470
|
-
to_capture[k] = json.loads(v) # type: ignore
|
476
|
+
to_capture[k] = json.loads(v) # type: ignore[literal-required]
|
471
477
|
elif k in ['proxy', 'headers']:
|
472
478
|
# can be dict or str
|
473
479
|
try:
|
474
|
-
to_capture[k] = json.loads(v) # type: ignore
|
480
|
+
to_capture[k] = json.loads(v) # type: ignore[literal-required]
|
475
481
|
except Exception:
|
476
|
-
to_capture[k] = v.decode() # type: ignore
|
482
|
+
to_capture[k] = v.decode() # type: ignore[literal-required]
|
477
483
|
elif k in ['general_timeout_in_sec', 'depth']:
|
478
484
|
# int
|
479
|
-
to_capture[k] = int(v) # type: ignore
|
485
|
+
to_capture[k] = int(v) # type: ignore[literal-required]
|
480
486
|
elif k in ['rendered_hostname_only', 'with_favicon']:
|
481
487
|
# bool
|
482
|
-
to_capture[k] = bool(int(v)) # type: ignore
|
488
|
+
to_capture[k] = bool(int(v)) # type: ignore[literal-required]
|
483
489
|
elif k == 'document':
|
484
490
|
document_as_bytes = b64decode(v)
|
485
491
|
else:
|
@@ -577,15 +583,15 @@ class LacusCore():
|
|
577
583
|
proxy=proxy,
|
578
584
|
general_timeout_in_sec=general_timeout) as capture:
|
579
585
|
# required by Mypy: https://github.com/python/mypy/issues/3004
|
580
|
-
capture.headers = to_capture.get('headers') # type: ignore
|
581
|
-
capture.cookies = to_capture.get('cookies') # type: ignore
|
582
|
-
capture.viewport = to_capture.get('viewport') # type: ignore
|
583
|
-
capture.user_agent = to_capture.get('user_agent') # type: ignore
|
584
|
-
capture.http_credentials = to_capture.get('http_credentials') # type: ignore
|
585
|
-
capture.geolocation = to_capture.get('geolocation') # type: ignore
|
586
|
-
capture.timezone_id = to_capture.get('timezone_id') # type: ignore
|
587
|
-
capture.locale = to_capture.get('locale') # type: ignore
|
588
|
-
capture.color_scheme = to_capture.get('color_scheme') # type: ignore
|
586
|
+
capture.headers = to_capture.get('headers') # type: ignore[assignment]
|
587
|
+
capture.cookies = to_capture.get('cookies') # type: ignore[assignment]
|
588
|
+
capture.viewport = to_capture.get('viewport') # type: ignore[assignment]
|
589
|
+
capture.user_agent = to_capture.get('user_agent') # type: ignore[assignment]
|
590
|
+
capture.http_credentials = to_capture.get('http_credentials') # type: ignore[assignment]
|
591
|
+
capture.geolocation = to_capture.get('geolocation') # type: ignore[assignment]
|
592
|
+
capture.timezone_id = to_capture.get('timezone_id') # type: ignore[assignment]
|
593
|
+
capture.locale = to_capture.get('locale') # type: ignore[assignment]
|
594
|
+
capture.color_scheme = to_capture.get('color_scheme') # type: ignore[assignment]
|
589
595
|
try:
|
590
596
|
await asyncio.wait_for(capture.initialize_context(), timeout=general_timeout)
|
591
597
|
except (TimeoutError, asyncio.exceptions.TimeoutError):
|
@@ -748,10 +754,10 @@ class LacusCore():
|
|
748
754
|
if key in ['har', 'cookies', 'potential_favicons', 'html', 'children'] or not results.get(key):
|
749
755
|
continue
|
750
756
|
# these entries can be stored directly
|
751
|
-
hash_to_set[key] = results[key] # type: ignore
|
757
|
+
hash_to_set[key] = results[key] # type: ignore[literal-required]
|
752
758
|
|
753
759
|
if hash_to_set:
|
754
|
-
pipeline.hset(root_key, mapping=hash_to_set) # type: ignore
|
760
|
+
pipeline.hset(root_key, mapping=hash_to_set) # type: ignore[arg-type]
|
755
761
|
# Make sure the key expires
|
756
762
|
pipeline.expire(root_key, 36000)
|
757
763
|
else:
|
@@ -782,19 +788,19 @@ class LacusCore():
|
|
782
788
|
to_return['children'] = []
|
783
789
|
for child_root_key in sorted(pickle.loads(value)):
|
784
790
|
if child := self._get_capture_response(capture_uuid, child_root_key):
|
785
|
-
to_return['children'].append(child) # type: ignore
|
791
|
+
to_return['children'].append(child) # type: ignore[union-attr]
|
786
792
|
elif key in [b'status']:
|
787
793
|
# The value in an int
|
788
|
-
to_return[key.decode()] = int(value) # type: ignore
|
794
|
+
to_return[key.decode()] = int(value) # type: ignore[literal-required]
|
789
795
|
elif key in [b'runtime']:
|
790
796
|
# The value is a float
|
791
|
-
to_return[key.decode()] = float(value) # type: ignore
|
797
|
+
to_return[key.decode()] = float(value) # type: ignore[literal-required]
|
792
798
|
elif key in [b'last_redirected_url', b'error', b'error_name', b'html', b'downloaded_filename']:
|
793
799
|
# the value is a string
|
794
|
-
to_return[key.decode()] = value.decode() # type: ignore
|
800
|
+
to_return[key.decode()] = value.decode() # type: ignore[literal-required]
|
795
801
|
elif key in [b'png', b'downloaded_file']:
|
796
802
|
# the value is bytes
|
797
|
-
to_return[key.decode()] = value # type: ignore
|
803
|
+
to_return[key.decode()] = value # type: ignore[literal-required]
|
798
804
|
else:
|
799
805
|
logger.critical(f'Unexpected key in response: {key} - {value}')
|
800
806
|
return to_return
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "lacuscore"
|
3
|
-
version = "1.
|
3
|
+
version = "1.8.0"
|
4
4
|
description = "Core of Lacus, usable as a module"
|
5
5
|
authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
|
6
6
|
license = "BSD-3-Clause"
|
@@ -22,6 +22,7 @@ classifiers = [
|
|
22
22
|
'Programming Language :: Python :: 3.9',
|
23
23
|
'Programming Language :: Python :: 3.10',
|
24
24
|
'Programming Language :: Python :: 3.11',
|
25
|
+
'Programming Language :: Python :: 3.12',
|
25
26
|
'Topic :: Security',
|
26
27
|
'Topic :: Internet',
|
27
28
|
]
|
@@ -33,7 +34,7 @@ Sphinx = [
|
|
33
34
|
{version = "<7.2", python = "<3.9", optional = true},
|
34
35
|
{version = "^7.2", python = ">=3.9", optional = true}
|
35
36
|
]
|
36
|
-
playwrightcapture = {extras = ["recaptcha"], version = "^1.
|
37
|
+
playwrightcapture = {extras = ["recaptcha"], version = "^1.23.0"}
|
37
38
|
defang = "^0.5.3"
|
38
39
|
ua-parser = "^0.18.0"
|
39
40
|
redis = {version = "^5.0.1", extras = ["hiredis"]}
|
@@ -44,14 +45,14 @@ docs = ["Sphinx"]
|
|
44
45
|
[tool.poetry.group.dev.dependencies]
|
45
46
|
types-redis = {version = "^4.6.0.20240106"}
|
46
47
|
mypy = "^1.8.0"
|
47
|
-
types-requests = "^2.31.0.
|
48
|
+
types-requests = "^2.31.0.20240125"
|
48
49
|
types-beautifulsoup4 = "^4.12.0.20240106"
|
49
50
|
ipython = [
|
50
51
|
{version = "<8.13.0", python = "<3.9"},
|
51
52
|
{version = "^8.18.0", python = ">=3.9"},
|
52
53
|
{version = "^8.19.0", python = ">=3.10"}
|
53
54
|
]
|
54
|
-
pytest = "^
|
55
|
+
pytest = "^8.0.0"
|
55
56
|
|
56
57
|
[build-system]
|
57
58
|
requires = ["poetry_core"]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|