lacuscore 1.9.3__tar.gz → 1.9.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lacuscore
3
- Version: 1.9.3
3
+ Version: 1.9.4
4
4
  Summary: Core of Lacus, usable as a module
5
5
  Home-page: https://github.com/ail-project/LacusCore
6
6
  License: BSD-3-Clause
@@ -28,9 +28,9 @@ Requires-Dist: Sphinx (>=7.2,<8.0) ; (python_version >= "3.9") and (extra == "do
28
28
  Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
29
29
  Requires-Dist: defang (>=0.5.3,<0.6.0)
30
30
  Requires-Dist: dnspython (>=2.6.1,<3.0.0)
31
- Requires-Dist: playwrightcapture[recaptcha] (>=1.24.6,<2.0.0)
31
+ Requires-Dist: playwrightcapture[recaptcha] (>=1.24.10,<2.0.0)
32
32
  Requires-Dist: redis[hiredis] (>=5.0.4,<6.0.0)
33
- Requires-Dist: requests (>=2.31.0,<3.0.0)
33
+ Requires-Dist: requests (>=2.32.1,<3.0.0)
34
34
  Requires-Dist: ua-parser (>=0.18.0,<0.19.0)
35
35
  Project-URL: Documentation, https://lacuscore.readthedocs.io/en/latest/
36
36
  Project-URL: Repository, https://github.com/ail-project/LacusCore
@@ -43,9 +43,19 @@ from .helpers import (
43
43
 
44
44
  if sys.version_info < (3, 11):
45
45
  from async_timeout import timeout
46
+
47
+ def timeout_expired(timeout_cm, logger, error_message: str) -> None: # type: ignore[no-untyped-def]
48
+ if timeout_cm.expired:
49
+ logger.warning(f'Timeout expired: {error_message}')
50
+
46
51
  else:
47
52
  from asyncio import timeout
48
53
 
54
+ def timeout_expired(timeout_cm, logger, error_message: str) -> None: # type: ignore[no-untyped-def]
55
+ if timeout_cm.expired():
56
+ logger.warning(f'Timeout expired: {error_message}')
57
+
58
+
49
59
  BROWSER = Literal['chromium', 'firefox', 'webkit']
50
60
 
51
61
 
@@ -523,8 +533,6 @@ class LacusCore():
523
533
  browser_engine = 'webkit'
524
534
  try:
525
535
  logger.debug(f'Capturing {url}')
526
- # NOTE: starting with python 3.11, we can use asyncio.timeout
527
- # async with asyncio.timeout(self.max_capture_time):
528
536
  general_timeout = to_capture.get('general_timeout_in_sec')
529
537
  stats_pipeline.sadd(f'stats:{today}:captures', url)
530
538
  async with Capture(
@@ -545,12 +553,19 @@ class LacusCore():
545
553
  capture.locale = to_capture.get('locale') # type: ignore[assignment]
546
554
  capture.color_scheme = to_capture.get('color_scheme') # type: ignore[assignment]
547
555
  try:
548
- async with timeout(general_timeout):
556
+ # make sure the initialization doesn't take too long
557
+ if general_timeout is None:
558
+ general_timeout = 5
559
+ init_timeout = max(general_timeout / 2, 5)
560
+ async with timeout(init_timeout) as initialize_timeout:
549
561
  await capture.initialize_context()
562
+
550
563
  except (TimeoutError, asyncio.exceptions.TimeoutError):
564
+ timeout_expired(initialize_timeout, logger, 'Initializing took too long.')
551
565
  logger.warning(f'Initializing the context for {url} took longer than the allowed general timeout ({general_timeout}s)')
552
566
  raise RetryCapture(f'Initializing the context for {url} took longer than the allowed general timeout ({general_timeout}s)')
553
- async with timeout(self.max_capture_time):
567
+
568
+ async with timeout(self.max_capture_time) as capture_timeout:
554
569
  playwright_result = await capture.capture_page(
555
570
  url, referer=to_capture.get('referer'),
556
571
  depth=to_capture.get('depth', 0),
@@ -576,6 +591,7 @@ class LacusCore():
576
591
  # We can give it another short.
577
592
  raise RetryCapture(f'The capture of {url} has been cancelled.')
578
593
  except (TimeoutError, asyncio.exceptions.TimeoutError):
594
+ timeout_expired(capture_timeout, logger, 'Capture took too long.')
579
595
  logger.warning(f'The capture of {url} took longer than the allowed max capture time ({self.max_capture_time}s)')
580
596
  result = {'error': f'The capture of {url} took longer than the allowed max capture time ({self.max_capture_time}s)'}
581
597
  raise CaptureError(f'The capture of {url} took longer than the allowed max capture time ({self.max_capture_time}s)')
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "lacuscore"
3
- version = "1.9.3"
3
+ version = "1.9.4"
4
4
  description = "Core of Lacus, usable as a module"
5
5
  authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
6
6
  license = "BSD-3-Clause"
@@ -29,12 +29,12 @@ classifiers = [
29
29
 
30
30
  [tool.poetry.dependencies]
31
31
  python = "^3.8"
32
- requests = "^2.31.0"
32
+ requests = "^2.32.1"
33
33
  Sphinx = [
34
34
  {version = "<7.2", python = "<3.9", optional = true},
35
35
  {version = "^7.2", python = ">=3.9", optional = true}
36
36
  ]
37
- playwrightcapture = {extras = ["recaptcha"], version = "^1.24.6"}
37
+ playwrightcapture = {extras = ["recaptcha"], version = "^1.24.10"}
38
38
  defang = "^0.5.3"
39
39
  ua-parser = "^0.18.0"
40
40
  redis = {version = "^5.0.4", extras = ["hiredis"]}
@@ -47,14 +47,14 @@ docs = ["Sphinx"]
47
47
  [tool.poetry.group.dev.dependencies]
48
48
  types-redis = {version = "^4.6.0.20240425"}
49
49
  mypy = "^1.10.0"
50
- types-requests = "^2.31.0.20240406"
51
- types-beautifulsoup4 = "^4.12.0.20240229"
50
+ types-requests = "^2.32.0.20240521"
51
+ types-beautifulsoup4 = "^4.12.0.20240511"
52
52
  ipython = [
53
53
  {version = "<8.13.0", python = "<3.9"},
54
54
  {version = "^8.18.0", python = ">=3.9"},
55
55
  {version = "^8.19.0", python = ">=3.10"}
56
56
  ]
57
- pytest = "^8.2.0"
57
+ pytest = "^8.2.1"
58
58
 
59
59
  [build-system]
60
60
  requires = ["poetry_core"]
File without changes
File without changes
File without changes