lacuscore 1.14.1__tar.gz → 1.14.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lacuscore
3
- Version: 1.14.1
3
+ Version: 1.14.3
4
4
  Summary: Core of Lacus, usable as a module
5
5
  License: BSD-3-Clause
6
6
  Author: Raphaël Vinot
@@ -26,7 +26,7 @@ Requires-Dist: async-timeout (>=5.0.1) ; python_version < "3.11"
26
26
  Requires-Dist: defang (>=0.5.3)
27
27
  Requires-Dist: dnspython (>=2.7.0)
28
28
  Requires-Dist: eval-type-backport (>=0.2.2) ; python_version < "3.10"
29
- Requires-Dist: playwrightcapture[recaptcha] (>=1.29.2)
29
+ Requires-Dist: playwrightcapture[recaptcha] (>=1.29.3)
30
30
  Requires-Dist: pydantic (>=2.11.4)
31
31
  Requires-Dist: redis[hiredis] (>=5.3.0,<6.0.0)
32
32
  Requires-Dist: requests (>=2.32.3)
@@ -19,7 +19,7 @@ from datetime import date, timedelta
19
19
  from ipaddress import ip_address, IPv4Address, IPv6Address
20
20
  from tempfile import NamedTemporaryFile
21
21
  from typing import Literal, Any, overload, cast, TYPE_CHECKING
22
- from collections.abc import Iterator
22
+ from collections.abc import AsyncIterator
23
23
  from uuid import uuid4
24
24
  from urllib.parse import urlsplit
25
25
 
@@ -331,20 +331,34 @@ class LacusCore():
331
331
  """
332
332
  if self.redis.zscore('lacus:to_capture', uuid) is not None:
333
333
  return CaptureStatus.QUEUED
334
- elif self.redis.zscore('lacus:ongoing', uuid) is not None:
334
+ if self.redis.zscore('lacus:ongoing', uuid) is not None:
335
335
  return CaptureStatus.ONGOING
336
- elif self.redis.exists(f'lacus:capture_settings:{uuid}'):
337
- # we might have popped the UUID out of lacus:to_capture
338
- # but not pused it in lacus:ongoing yet
339
- return CaptureStatus.QUEUED
340
- elif self.redis.exists(f'lacus:capture_results_hash:{uuid}'):
336
+ if self.redis.exists(f'lacus:capture_settings:{uuid}'):
337
+ # we might have a race condition between when the UUID is popped out of lacus:to_capture,
338
+ # and pushed in lacus:ongoing.
339
+ # if that's the case, we wait for a sec and check lacus:ongoing again
340
+ # If it's still not in ongoing, the UUID is broken and can be consdered unknown.
341
+ # This key is removed anyway once the capture is done.
342
+ max_checks = 10
343
+ for i in range(max_checks):
344
+ time.sleep(.1)
345
+ if self.redis.zscore('lacus:to_capture', uuid) is not None:
346
+ # Could be re-added in that queue if the capture failed, but will be retried
347
+ return CaptureStatus.QUEUED
348
+ if self.redis.zscore('lacus:ongoing', uuid) is not None:
349
+ # The capture is actually ongoing now
350
+ return CaptureStatus.ONGOING
351
+ # The UUID is still no anywhere to be found, it's broken.
352
+ self.redis.delete(f'lacus:capture_settings:{uuid}')
353
+ return CaptureStatus.UNKNOWN
354
+ if self.redis.exists(f'lacus:capture_results_hash:{uuid}'):
341
355
  return CaptureStatus.DONE
342
- elif self.redis.exists(f'lacus:capture_results:{uuid}'):
356
+ if self.redis.exists(f'lacus:capture_results:{uuid}'):
343
357
  # TODO: remove in 1.8.* - old format used last in 1.6, and kept no more than 10H in redis
344
358
  return CaptureStatus.DONE
345
359
  return CaptureStatus.UNKNOWN
346
360
 
347
- def consume_queue(self, max_consume: int) -> Iterator[Task]: # type: ignore[type-arg]
361
+ async def consume_queue(self, max_consume: int) -> AsyncIterator[Task[None]]:
348
362
  """Trigger the capture for captures with the highest priority. Up to max_consume.
349
363
 
350
364
  :yield: Captures.
@@ -364,6 +378,8 @@ class LacusCore():
364
378
  yield task_logger.create_task(self._capture(uuid, priority), name=uuid,
365
379
  logger=logger,
366
380
  message='Capture raised an uncaught exception')
381
+ # Make sur the task starts.
382
+ await asyncio.sleep(0.5)
367
383
 
368
384
  async def _capture(self, uuid: str, priority: int) -> None:
369
385
  """Trigger a specific capture
@@ -623,9 +639,18 @@ class LacusCore():
623
639
  # NOTE: in this block, we absolutely have to make sure the UUID is removed
624
640
  # from the lacus:ongoing sorted set (it is definitely not ongoing anymore)
625
641
  # and optionally re-added to lacus:to_capture if re want to retry it
642
+ #
643
+ # In order to have a consistent capture status, the capture UUID must either be in
644
+ # lacus:ongoing (while ongoing), in lacus:to_capture (on retry), or the result stored (on success).
645
+ # If the capture fails to be stored in valkey, we must also remove the capture settings
646
+ # so it is not dangling there.
626
647
 
627
- if to_capture.document:
628
- os.unlink(tmp_f.name)
648
+ try:
649
+ if to_capture.document:
650
+ os.unlink(tmp_f.name)
651
+ except UnboundLocalError:
652
+ # Missing settings, the capture failed.
653
+ pass
629
654
 
630
655
  if retry:
631
656
  if self.redis.zcard('lacus:to_capture') == 0:
@@ -655,7 +680,11 @@ class LacusCore():
655
680
  retry_redis_error -= 1
656
681
  await asyncio.sleep(random.randint(5, 10))
657
682
  else:
658
- self.redis.zrem('lacus:ongoing', uuid)
683
+ # Unrecoverable redis error, remove the capture settings
684
+ p = self.redis.pipeline()
685
+ p.delete(f'lacus:capture_settings:{uuid}')
686
+ p.zrem('lacus:ongoing', uuid)
687
+ p.execute()
659
688
  stats_pipeline.zincrby(f'stats:{today}:errors', 1, 'Redis Connection')
660
689
  logger.critical('Unable to connect to redis and to push the result of the capture.')
661
690
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lacuscore"
3
- version = "1.14.1"
3
+ version = "1.14.3"
4
4
  description = "Core of Lacus, usable as a module"
5
5
  authors = [
6
6
  {name="Raphaël Vinot", email="raphael.vinot@circl.lu"}
@@ -15,7 +15,7 @@ dynamic = [ "classifiers" ]
15
15
 
16
16
  dependencies = [
17
17
  "requests (>=2.32.3)",
18
- "playwrightcapture[recaptcha] (>=1.29.2)",
18
+ "playwrightcapture[recaptcha] (>=1.29.3)",
19
19
  "defang (>=0.5.3)",
20
20
  "ua-parser[regex] (>=1.0.1)",
21
21
  "redis[hiredis] (>=5.3.0,<6.0.0)",
@@ -49,8 +49,8 @@ docs = ["Sphinx (>=8.2.3) ; python_version >= \"3.11\""]
49
49
  [tool.poetry.group.dev.dependencies]
50
50
  mypy = "^1.15.0"
51
51
  types-redis = {version = "^4.6.0.20241004"}
52
- types-requests = "^2.32.0.20250328"
53
- types-beautifulsoup4 = "^4.12.0.20250204"
52
+ types-requests = "^2.32.0.20250515"
53
+ types-beautifulsoup4 = "^4.12.0.20250516"
54
54
  pytest = "^8.3.5"
55
55
 
56
56
  [build-system]
File without changes
File without changes