lacuscore 1.6.6__py3-none-any.whl → 1.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lacuscore/lacuscore.py +87 -19
- {lacuscore-1.6.6.dist-info → lacuscore-1.6.7.dist-info}/METADATA +2 -2
- {lacuscore-1.6.6.dist-info → lacuscore-1.6.7.dist-info}/RECORD +5 -5
- {lacuscore-1.6.6.dist-info → lacuscore-1.6.7.dist-info}/LICENSE +0 -0
- {lacuscore-1.6.6.dist-info → lacuscore-1.6.7.dist-info}/WHEEL +0 -0
lacuscore/lacuscore.py
CHANGED
@@ -10,7 +10,6 @@ import pickle
|
|
10
10
|
import random
|
11
11
|
import re
|
12
12
|
import socket
|
13
|
-
import sys
|
14
13
|
import time
|
15
14
|
import unicodedata
|
16
15
|
import zlib
|
@@ -78,6 +77,9 @@ class CaptureStatus(IntEnum):
|
|
78
77
|
class CaptureResponse(PlaywrightCaptureResponse, TypedDict, total=False):
|
79
78
|
'''A capture made by Lacus. With the base64 encoded image and downloaded file decoded to bytes.'''
|
80
79
|
|
80
|
+
# Need to make sure the type is what's expected down the line
|
81
|
+
children: Optional[List['CaptureResponse']] # type: ignore
|
82
|
+
|
81
83
|
status: int
|
82
84
|
runtime: Optional[float]
|
83
85
|
|
@@ -94,7 +96,7 @@ class CaptureResponseJson(TypedDict, total=False):
|
|
94
96
|
png: Optional[str]
|
95
97
|
downloaded_filename: Optional[str]
|
96
98
|
downloaded_file: Optional[str]
|
97
|
-
children: Optional[List[
|
99
|
+
children: Optional[List['CaptureResponseJson']]
|
98
100
|
runtime: Optional[float]
|
99
101
|
potential_favicons: Optional[List[str]]
|
100
102
|
|
@@ -139,11 +141,6 @@ class LacusCoreLogAdapter(LoggerAdapter):
|
|
139
141
|
return msg, kwargs
|
140
142
|
|
141
143
|
|
142
|
-
def _json_encode(obj: Union[bytes]) -> str:
|
143
|
-
if isinstance(obj, bytes):
|
144
|
-
return b64encode(obj).decode()
|
145
|
-
|
146
|
-
|
147
144
|
class LacusCore():
|
148
145
|
"""Capture URLs or web enabled documents using PlaywrightCapture.
|
149
146
|
|
@@ -168,7 +165,7 @@ class LacusCore():
|
|
168
165
|
self.only_global_lookups = only_global_lookups
|
169
166
|
self.max_retries = max_retries
|
170
167
|
|
171
|
-
# NOTE: clear old ongoing captures queue in case of need
|
168
|
+
# NOTE: Remove in 1.8.* - clear old ongoing captures queue in case of need
|
172
169
|
if self.redis.type('lacus:ongoing') in ['set', b'set']:
|
173
170
|
self.redis.delete('lacus:ongoing')
|
174
171
|
|
@@ -353,8 +350,7 @@ class LacusCore():
|
|
353
350
|
if capture.get('downloaded_file') is not None and capture['downloaded_file'] is not None: # the second part is not needed, but makes mypy happy
|
354
351
|
encoded_capture['downloaded_file'] = b64encode(capture['downloaded_file']).decode()
|
355
352
|
if capture.get('children') and capture['children']:
|
356
|
-
for child in capture['children']
|
357
|
-
child = self._encode_response(child)
|
353
|
+
encoded_capture['children'] = [self._encode_response(child) for child in capture['children']]
|
358
354
|
|
359
355
|
# A set cannot be dumped in json, it must be turned into a list. If it is empty, we need to remove it.
|
360
356
|
if 'potential_favicons' in capture:
|
@@ -383,10 +379,9 @@ class LacusCore():
|
|
383
379
|
to_return['status'] = CaptureStatus.QUEUED
|
384
380
|
elif self.redis.zscore('lacus:ongoing', uuid) is not None:
|
385
381
|
to_return['status'] = CaptureStatus.ONGOING
|
386
|
-
elif response := self.
|
382
|
+
elif response := self._get_capture_response(uuid):
|
387
383
|
to_return['status'] = CaptureStatus.DONE
|
388
|
-
|
389
|
-
to_return.update(response_json)
|
384
|
+
to_return.update(response)
|
390
385
|
if decode:
|
391
386
|
return to_return
|
392
387
|
return self._encode_response(to_return)
|
@@ -403,7 +398,10 @@ class LacusCore():
|
|
403
398
|
return CaptureStatus.QUEUED
|
404
399
|
elif self.redis.zscore('lacus:ongoing', uuid) is not None:
|
405
400
|
return CaptureStatus.ONGOING
|
401
|
+
elif self.redis.exists(f'lacus:capture_results_hash:{uuid}'):
|
402
|
+
return CaptureStatus.DONE
|
406
403
|
elif self.redis.exists(f'lacus:capture_results:{uuid}'):
|
404
|
+
# TODO: remove in 1.8.* - old format used last in 1.6, and kept no more than 10H in redis
|
407
405
|
return CaptureStatus.DONE
|
408
406
|
return CaptureStatus.UNKNOWN
|
409
407
|
|
@@ -677,18 +675,17 @@ class LacusCore():
|
|
677
675
|
p.zadd('lacus:to_capture', {uuid: priority - 1})
|
678
676
|
p.execute()
|
679
677
|
else:
|
680
|
-
to_store = zlib.compress(pickle.dumps(result))
|
681
678
|
retry_redis_error = 3
|
682
679
|
while retry_redis_error > 0:
|
683
680
|
try:
|
684
681
|
p = self.redis.pipeline()
|
685
|
-
|
682
|
+
self._store_capture_response(p, uuid, result)
|
686
683
|
p.delete(f'lacus:capture_settings:{uuid}')
|
687
684
|
p.zrem('lacus:ongoing', uuid)
|
688
685
|
p.execute()
|
689
686
|
break
|
690
687
|
except RedisConnectionError as e:
|
691
|
-
logger.warning(f'Unable to store capture result
|
688
|
+
logger.warning(f'Unable to store capture result - Redis Connection Error: {e}')
|
692
689
|
retry_redis_error -= 1
|
693
690
|
await asyncio.sleep(random.randint(5, 10))
|
694
691
|
else:
|
@@ -704,6 +701,78 @@ class LacusCore():
|
|
704
701
|
stats_pipeline.expire(f'stats:{today}:captures', expire_time)
|
705
702
|
stats_pipeline.execute()
|
706
703
|
|
704
|
+
def _store_capture_response(self, pipeline: Redis, capture_uuid: str, results: CaptureResponse,
|
705
|
+
root_key: Optional[str]=None) -> None:
|
706
|
+
if root_key is None:
|
707
|
+
root_key = f'lacus:capture_results_hash:{capture_uuid}'
|
708
|
+
|
709
|
+
hash_to_set = {}
|
710
|
+
if results.get('har'):
|
711
|
+
hash_to_set['har'] = pickle.dumps(results['har'])
|
712
|
+
if results.get('cookies'):
|
713
|
+
hash_to_set['cookies'] = pickle.dumps(results['cookies'])
|
714
|
+
if results.get('potential_favicons'):
|
715
|
+
hash_to_set['potential_favicons'] = pickle.dumps(results['potential_favicons'])
|
716
|
+
if 'children' in results and results['children'] is not None:
|
717
|
+
padding_length = len(str(len(results['children'])))
|
718
|
+
children = set()
|
719
|
+
for i, child in enumerate(results['children']):
|
720
|
+
child_key = f'{root_key}_{i:0{padding_length}}'
|
721
|
+
self._store_capture_response(pipeline, capture_uuid, child, child_key)
|
722
|
+
children.add(child_key)
|
723
|
+
hash_to_set['children'] = pickle.dumps(children)
|
724
|
+
|
725
|
+
for key in results.keys():
|
726
|
+
if key in ['har', 'cookies', 'potential_favicons', 'children'] or not results.get(key):
|
727
|
+
continue
|
728
|
+
# these entries can be stored directly
|
729
|
+
hash_to_set[key] = results[key] # type: ignore
|
730
|
+
pipeline.hset(root_key, mapping=hash_to_set) # type: ignore
|
731
|
+
# Make sure the key expires
|
732
|
+
pipeline.expire(root_key, 36000)
|
733
|
+
|
734
|
+
def _get_capture_response(self, capture_uuid: str, root_key: Optional[str]=None) -> Optional[CaptureResponse]:
|
735
|
+
logger = LacusCoreLogAdapter(self.master_logger, {'uuid': capture_uuid})
|
736
|
+
if root_key is None:
|
737
|
+
root_key = f'lacus:capture_results_hash:{capture_uuid}'
|
738
|
+
|
739
|
+
if not self.redis.exists(root_key):
|
740
|
+
if old_response := self.redis.get(f'lacus:capture_results:{capture_uuid}'):
|
741
|
+
# TODO: remove in 1.8.* - old format used last in 1.6, and kept no more than 10H in redis
|
742
|
+
return pickle.loads(zlib.decompress(old_response))
|
743
|
+
return None
|
744
|
+
|
745
|
+
# New format and capture done
|
746
|
+
|
747
|
+
to_return: CaptureResponse = {}
|
748
|
+
for key, value in self.redis.hgetall(root_key).items():
|
749
|
+
if key == b'har':
|
750
|
+
to_return['har'] = pickle.loads(value)
|
751
|
+
elif key == b'cookies':
|
752
|
+
to_return['cookies'] = pickle.loads(value)
|
753
|
+
elif key == b'potential_favicons':
|
754
|
+
to_return['potential_favicons'] = pickle.loads(value)
|
755
|
+
elif key == b'children':
|
756
|
+
to_return['children'] = []
|
757
|
+
for child_root_key in sorted(pickle.loads(value)):
|
758
|
+
child = self._get_capture_response(capture_uuid, child_root_key)
|
759
|
+
to_return['children'].append(child) # type: ignore
|
760
|
+
elif key in [b'status']:
|
761
|
+
# The value in an int
|
762
|
+
to_return[key.decode()] = int(value) # type: ignore
|
763
|
+
elif key in [b'runtime']:
|
764
|
+
# The value is a float
|
765
|
+
to_return[key.decode()] = float(value) # type: ignore
|
766
|
+
elif key in [b'last_redirected_url', b'error', b'error_name', b'html', b'downloaded_filename']:
|
767
|
+
# the value is a string
|
768
|
+
to_return[key.decode()] = value.decode() # type: ignore
|
769
|
+
elif key in [b'png', b'downloaded_file']:
|
770
|
+
# the value is bytes
|
771
|
+
to_return[key.decode()] = value # type: ignore
|
772
|
+
else:
|
773
|
+
logger.critical(f'Unexpected key in response: {key} - {value}')
|
774
|
+
return to_return
|
775
|
+
|
707
776
|
def clear_capture(self, uuid: str, reason: str):
|
708
777
|
'''Remove a capture from the list, shouldn't happen unless it is in error'''
|
709
778
|
logger = LacusCoreLogAdapter(self.master_logger, {'uuid': uuid})
|
@@ -711,10 +780,9 @@ class LacusCore():
|
|
711
780
|
logger.warning('Attempted to clear capture that is still being processed.')
|
712
781
|
return
|
713
782
|
logger.warning(f'Clearing capture: {reason}')
|
714
|
-
result = {'error': reason}
|
783
|
+
result: CaptureResponse = {'error': reason}
|
715
784
|
p = self.redis.pipeline()
|
716
|
-
|
717
|
-
p.setex(f'lacus:capture_results:{uuid}', 36000, to_store)
|
785
|
+
self._store_capture_response(p, uuid, result)
|
718
786
|
p.delete(f'lacus:capture_settings:{uuid}')
|
719
787
|
p.zrem('lacus:ongoing', uuid)
|
720
788
|
p.execute()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lacuscore
|
3
|
-
Version: 1.6.
|
3
|
+
Version: 1.6.7
|
4
4
|
Summary: Core of Lacus, usable as a module
|
5
5
|
Home-page: https://github.com/ail-project/LacusCore
|
6
6
|
License: BSD-3-Clause
|
@@ -24,7 +24,7 @@ Classifier: Topic :: Security
|
|
24
24
|
Provides-Extra: docs
|
25
25
|
Requires-Dist: Sphinx (>=7.1.2,<8.0.0) ; extra == "docs"
|
26
26
|
Requires-Dist: defang (>=0.5.3,<0.6.0)
|
27
|
-
Requires-Dist: playwrightcapture[recaptcha] (>=1.21.
|
27
|
+
Requires-Dist: playwrightcapture[recaptcha] (>=1.21.6,<2.0.0)
|
28
28
|
Requires-Dist: redis[hiredis] (>=4.6.0,<5.0.0)
|
29
29
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
30
30
|
Requires-Dist: ua-parser (>=0.18.0,<0.19.0)
|
@@ -1,9 +1,9 @@
|
|
1
1
|
README.md,sha256=NVr2b3eX2dwOO917TtyGGhCTLtmFbLCFPy9MH8JPUMU,941
|
2
2
|
lacuscore/__init__.py,sha256=ytBrQRBXO1Q5yV72qyS16Q7Auqebl3EMhhLQUa0Sg4g,169
|
3
3
|
lacuscore/lacus_monitoring.py,sha256=_mhKcfNQxnpiDvZcOEy5HvlNQQtVQy1SrfgMkLaCiTI,2138
|
4
|
-
lacuscore/lacuscore.py,sha256=
|
4
|
+
lacuscore/lacuscore.py,sha256=0nIGKbMJwM1L9QtuYngTCmI6_hNgIRjNsPFIkRQyqCU,38485
|
5
5
|
lacuscore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
lacuscore-1.6.
|
7
|
-
lacuscore-1.6.
|
8
|
-
lacuscore-1.6.
|
9
|
-
lacuscore-1.6.
|
6
|
+
lacuscore-1.6.7.dist-info/LICENSE,sha256=4C4hLYrIkUD96Ggk-y_Go1Qf7PBZrEm9PSeTGe2nd4s,1516
|
7
|
+
lacuscore-1.6.7.dist-info/METADATA,sha256=rlPKFSrdzr6QGXhvs63pEkYaqPEkVvtPdhdWw-ayZPI,2357
|
8
|
+
lacuscore-1.6.7.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
9
|
+
lacuscore-1.6.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|