lacuscore 1.14.0__tar.gz → 1.14.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lacuscore-1.14.0 → lacuscore-1.14.2}/PKG-INFO +4 -4
- {lacuscore-1.14.0 → lacuscore-1.14.2}/lacuscore/lacuscore.py +32 -8
- {lacuscore-1.14.0 → lacuscore-1.14.2}/pyproject.toml +6 -6
- {lacuscore-1.14.0 → lacuscore-1.14.2}/LICENSE +0 -0
- {lacuscore-1.14.0 → lacuscore-1.14.2}/README.md +0 -0
- {lacuscore-1.14.0 → lacuscore-1.14.2}/lacuscore/__init__.py +0 -0
- {lacuscore-1.14.0 → lacuscore-1.14.2}/lacuscore/helpers.py +0 -0
- {lacuscore-1.14.0 → lacuscore-1.14.2}/lacuscore/lacus_monitoring.py +0 -0
- {lacuscore-1.14.0 → lacuscore-1.14.2}/lacuscore/py.typed +0 -0
- {lacuscore-1.14.0 → lacuscore-1.14.2}/lacuscore/task_logger.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lacuscore
|
3
|
-
Version: 1.14.
|
3
|
+
Version: 1.14.2
|
4
4
|
Summary: Core of Lacus, usable as a module
|
5
5
|
License: BSD-3-Clause
|
6
6
|
Author: Raphaël Vinot
|
@@ -26,9 +26,9 @@ Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
|
|
26
26
|
Requires-Dist: defang (>=0.5.3)
|
27
27
|
Requires-Dist: dnspython (>=2.7.0)
|
28
28
|
Requires-Dist: eval-type-backport (>=0.2.2) ; python_version < "3.10"
|
29
|
-
Requires-Dist: playwrightcapture[recaptcha] (>=1.29.
|
30
|
-
Requires-Dist: pydantic (>=2.11.
|
31
|
-
Requires-Dist: redis[hiredis] (>=5.
|
29
|
+
Requires-Dist: playwrightcapture[recaptcha] (>=1.29.3)
|
30
|
+
Requires-Dist: pydantic (>=2.11.4)
|
31
|
+
Requires-Dist: redis[hiredis] (>=5.3.0,<6.0.0)
|
32
32
|
Requires-Dist: requests (>=2.32.3)
|
33
33
|
Requires-Dist: typing-extensions (>=4.12.2,<5.0.0) ; python_version < "3.12"
|
34
34
|
Requires-Dist: ua-parser[regex] (>=1.0.1)
|
@@ -269,6 +269,7 @@ class LacusCore():
|
|
269
269
|
p = self.redis.pipeline()
|
270
270
|
p.set(f'lacus:query_hash:{hash_query}', perma_uuid, nx=True, ex=recapture_interval)
|
271
271
|
p.hset(f'lacus:capture_settings:{perma_uuid}', mapping=to_enqueue.redis_dump())
|
272
|
+
p.expire(f'lacus:capture_settings:{perma_uuid}', self.max_capture_time * 2)
|
272
273
|
p.zadd('lacus:to_capture', {perma_uuid: priority if priority is not None else 0})
|
273
274
|
try:
|
274
275
|
p.execute()
|
@@ -330,15 +331,29 @@ class LacusCore():
|
|
330
331
|
"""
|
331
332
|
if self.redis.zscore('lacus:to_capture', uuid) is not None:
|
332
333
|
return CaptureStatus.QUEUED
|
333
|
-
|
334
|
+
if self.redis.zscore('lacus:ongoing', uuid) is not None:
|
334
335
|
return CaptureStatus.ONGOING
|
335
|
-
|
336
|
-
# we might have
|
337
|
-
#
|
338
|
-
|
339
|
-
|
336
|
+
if self.redis.exists(f'lacus:capture_settings:{uuid}'):
|
337
|
+
# we might have a race condition between when the UUID is popped out of lacus:to_capture,
|
338
|
+
# and pushed in lacus:ongoing.
|
339
|
+
# if that's the case, we wait for a sec and check lacus:ongoing again
|
340
|
+
# If it's still not in ongoing, the UUID is broken and can be consdered unknown.
|
341
|
+
# This key is removed anyway once the capture is done.
|
342
|
+
max_checks = 10
|
343
|
+
for i in range(max_checks):
|
344
|
+
time.sleep(.1)
|
345
|
+
if self.redis.zscore('lacus:to_capture', uuid) is not None:
|
346
|
+
# Could be re-added in that queue if the capture failed, but will be retried
|
347
|
+
return CaptureStatus.QUEUED
|
348
|
+
if self.redis.zscore('lacus:ongoing', uuid) is not None:
|
349
|
+
# The capture is actually ongoing now
|
350
|
+
return CaptureStatus.ONGOING
|
351
|
+
# The UUID is still no anywhere to be found, it's broken.
|
352
|
+
self.redis.delete(f'lacus:capture_settings:{uuid}')
|
353
|
+
return CaptureStatus.UNKNOWN
|
354
|
+
if self.redis.exists(f'lacus:capture_results_hash:{uuid}'):
|
340
355
|
return CaptureStatus.DONE
|
341
|
-
|
356
|
+
if self.redis.exists(f'lacus:capture_results:{uuid}'):
|
342
357
|
# TODO: remove in 1.8.* - old format used last in 1.6, and kept no more than 10H in redis
|
343
358
|
return CaptureStatus.DONE
|
344
359
|
return CaptureStatus.UNKNOWN
|
@@ -622,6 +637,11 @@ class LacusCore():
|
|
622
637
|
# NOTE: in this block, we absolutely have to make sure the UUID is removed
|
623
638
|
# from the lacus:ongoing sorted set (it is definitely not ongoing anymore)
|
624
639
|
# and optionally re-added to lacus:to_capture if re want to retry it
|
640
|
+
#
|
641
|
+
# In order to have a consistent capture status, the capture UUID must either be in
|
642
|
+
# lacus:ongoing (while ongoing), in lacus:to_capture (on retry), or the result stored (on success).
|
643
|
+
# If the capture fails to be stored in valkey, we must also remove the capture settings
|
644
|
+
# so it is not dangling there.
|
625
645
|
|
626
646
|
if to_capture.document:
|
627
647
|
os.unlink(tmp_f.name)
|
@@ -654,7 +674,11 @@ class LacusCore():
|
|
654
674
|
retry_redis_error -= 1
|
655
675
|
await asyncio.sleep(random.randint(5, 10))
|
656
676
|
else:
|
657
|
-
|
677
|
+
# Unrecoverable redis error, remove the capture settings
|
678
|
+
p = self.redis.pipeline()
|
679
|
+
p.delete(f'lacus:capture_settings:{uuid}')
|
680
|
+
p.zrem('lacus:ongoing', uuid)
|
681
|
+
p.execute()
|
658
682
|
stats_pipeline.zincrby(f'stats:{today}:errors', 1, 'Redis Connection')
|
659
683
|
logger.critical('Unable to connect to redis and to push the result of the capture.')
|
660
684
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "lacuscore"
|
3
|
-
version = "1.14.
|
3
|
+
version = "1.14.2"
|
4
4
|
description = "Core of Lacus, usable as a module"
|
5
5
|
authors = [
|
6
6
|
{name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
|
@@ -15,13 +15,13 @@ dynamic = [ "classifiers" ]
|
|
15
15
|
|
16
16
|
dependencies = [
|
17
17
|
"requests (>=2.32.3)",
|
18
|
-
"playwrightcapture[recaptcha] (>=1.29.
|
18
|
+
"playwrightcapture[recaptcha] (>=1.29.3)",
|
19
19
|
"defang (>=0.5.3)",
|
20
20
|
"ua-parser[regex] (>=1.0.1)",
|
21
|
-
"redis
|
21
|
+
"redis[hiredis] (>=5.3.0,<6.0.0)",
|
22
22
|
"dnspython (>=2.7.0)",
|
23
23
|
"async-timeout (>=5.0.1) ; python_version < \"3.11\"",
|
24
|
-
"pydantic (>=2.11.
|
24
|
+
"pydantic (>=2.11.4)",
|
25
25
|
"eval-type-backport (>=0.2.2) ; python_version < \"3.10\"",
|
26
26
|
"typing-extensions (>=4.12.2,<5.0.0) ; python_version < \"3.12\""
|
27
27
|
]
|
@@ -49,8 +49,8 @@ docs = ["Sphinx (>=8.2.3) ; python_version >= \"3.11\""]
|
|
49
49
|
[tool.poetry.group.dev.dependencies]
|
50
50
|
mypy = "^1.15.0"
|
51
51
|
types-redis = {version = "^4.6.0.20241004"}
|
52
|
-
types-requests = "^2.32.0.
|
53
|
-
types-beautifulsoup4 = "^4.12.0.
|
52
|
+
types-requests = "^2.32.0.20250515"
|
53
|
+
types-beautifulsoup4 = "^4.12.0.20250516"
|
54
54
|
pytest = "^8.3.5"
|
55
55
|
|
56
56
|
[build-system]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|