lacuscore 1.7.10__tar.gz → 1.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lacuscore
3
- Version: 1.7.10
3
+ Version: 1.8.0
4
4
  Summary: Core of Lacus, usable as a module
5
5
  Home-page: https://github.com/ail-project/LacusCore
6
6
  License: BSD-3-Clause
@@ -26,7 +26,7 @@ Provides-Extra: docs
26
26
  Requires-Dist: Sphinx (<7.2) ; (python_version < "3.9") and (extra == "docs")
27
27
  Requires-Dist: Sphinx (>=7.2,<8.0) ; (python_version >= "3.9") and (extra == "docs")
28
28
  Requires-Dist: defang (>=0.5.3,<0.6.0)
29
- Requires-Dist: playwrightcapture[recaptcha] (>=1.22.7,<2.0.0)
29
+ Requires-Dist: playwrightcapture[recaptcha] (>=1.23.0,<2.0.0)
30
30
  Requires-Dist: redis[hiredis] (>=5.0.1,<6.0.0)
31
31
  Requires-Dist: requests (>=2.31.0,<3.0.0)
32
32
  Requires-Dist: ua-parser (>=0.18.0,<0.19.0)
@@ -27,13 +27,13 @@ from typing import Literal, Any, TypedDict, overload, cast, MutableMapping, Iter
27
27
  from uuid import uuid4
28
28
  from urllib.parse import urlsplit
29
29
 
30
- from defang import refang # type: ignore
30
+ from defang import refang # type: ignore[import-untyped]
31
31
  from playwrightcapture import Capture, PlaywrightCaptureException
32
32
  from playwrightcapture.capture import CaptureResponse as PlaywrightCaptureResponse
33
33
  from redis import Redis
34
34
  from redis.exceptions import ConnectionError as RedisConnectionError
35
35
  from redis.exceptions import DataError
36
- from ua_parser import user_agent_parser # type: ignore
36
+ from ua_parser import user_agent_parser # type: ignore[import-untyped]
37
37
 
38
38
  BROWSER = Literal['chromium', 'firefox', 'webkit']
39
39
 
@@ -81,7 +81,7 @@ class CaptureResponse(PlaywrightCaptureResponse, TypedDict, total=False):
81
81
  '''A capture made by Lacus. With the base64 encoded image and downloaded file decoded to bytes.'''
82
82
 
83
83
  # Need to make sure the type is what's expected down the line
84
- children: list[CaptureResponse] | None # type: ignore
84
+ children: list[CaptureResponse] | None # type: ignore[misc]
85
85
 
86
86
  status: int
87
87
  runtime: float | None
@@ -125,9 +125,9 @@ class CaptureSettings(TypedDict, total=False):
125
125
  viewport: dict[str, int] | None
126
126
  referer: str | None
127
127
  with_favicon: bool
128
- force: bool | None
129
- recapture_interval: int | None
130
- priority: int | None
128
+ force: bool
129
+ recapture_interval: int
130
+ priority: int
131
131
  uuid: str | None
132
132
 
133
133
  depth: int
@@ -268,11 +268,11 @@ class LacusCore():
268
268
  if 'url' in settings and settings['url'] is not None:
269
269
  settings['url'] = settings['url'].strip()
270
270
  if settings.get('force') is not None:
271
- force = settings.pop('force') # type: ignore
271
+ force = settings.pop('force', False)
272
272
  if settings.get('recapture_interval') is not None:
273
- recapture_interval = settings.pop('recapture_interval') # type: ignore
273
+ recapture_interval = settings.pop('recapture_interval', 300)
274
274
  if settings.get('priority') is not None:
275
- priority = settings.pop('priority') # type: ignore
275
+ priority = settings.pop('priority', 0)
276
276
  to_enqueue = settings
277
277
  else:
278
278
  to_enqueue = {'depth': depth, 'rendered_hostname_only': rendered_hostname_only}
@@ -331,17 +331,19 @@ class LacusCore():
331
331
 
332
332
  mapping_capture: dict[str, bytes | float | int | str] = {}
333
333
  for key, value in to_enqueue.items():
334
+ if value is None:
335
+ continue
334
336
  if isinstance(value, bool):
335
337
  mapping_capture[key] = 1 if value else 0
336
338
  elif isinstance(value, (list, dict)):
337
339
  if value:
338
340
  mapping_capture[key] = json.dumps(value)
339
- elif value is not None and value != '': # we're ok with 0 for example
340
- mapping_capture[key] = value # type: ignore
341
+ elif isinstance(value, (bytes, float, int, str)) and value not in ['', b'']: # we're ok with 0 for example
342
+ mapping_capture[key] = value
341
343
 
342
344
  p = self.redis.pipeline()
343
345
  p.set(f'lacus:query_hash:{hash_query}', perma_uuid, nx=True, ex=recapture_interval)
344
- p.hset(f'lacus:capture_settings:{perma_uuid}', mapping=mapping_capture) # type: ignore
346
+ p.hset(f'lacus:capture_settings:{perma_uuid}', mapping=mapping_capture) # type: ignore[arg-type]
345
347
  p.zadd('lacus:to_capture', {perma_uuid: priority if priority is not None else 0})
346
348
  try:
347
349
  p.execute()
@@ -405,6 +407,10 @@ class LacusCore():
405
407
  return CaptureStatus.QUEUED
406
408
  elif self.redis.zscore('lacus:ongoing', uuid) is not None:
407
409
  return CaptureStatus.ONGOING
410
+ elif self.redis.exists(f'lacus:capture_settings:{uuid}'):
411
+ # we might have popped the UUID out of lacus:to_capture
412
+ # but not pused it in lacus:ongoing yet
413
+ return CaptureStatus.QUEUED
408
414
  elif self.redis.exists(f'lacus:capture_results_hash:{uuid}'):
409
415
  return CaptureStatus.DONE
410
416
  elif self.redis.exists(f'lacus:capture_results:{uuid}'):
@@ -437,7 +443,7 @@ class LacusCore():
437
443
  :param priority: Only for internal use, will decide on the priority of the capture if the try now fails.
438
444
  """
439
445
  if self.redis.zscore('lacus:ongoing', uuid) is not None:
440
- # the capture is ongoing
446
+ # the capture is already ongoing
441
447
  await asyncio.sleep(1)
442
448
  return
443
449
 
@@ -464,22 +470,22 @@ class LacusCore():
464
470
  if k in ['url', 'document_name', 'browser', 'device_name', 'user_agent',
465
471
  'referer', 'timezone_id', 'locale', 'color_scheme']:
466
472
  # string
467
- to_capture[k] = v.decode() # type: ignore
473
+ to_capture[k] = v.decode() # type: ignore[literal-required]
468
474
  elif k in ['cookies', 'http_credentials', 'viewport', 'geolocation']:
469
475
  # dicts or list
470
- to_capture[k] = json.loads(v) # type: ignore
476
+ to_capture[k] = json.loads(v) # type: ignore[literal-required]
471
477
  elif k in ['proxy', 'headers']:
472
478
  # can be dict or str
473
479
  try:
474
- to_capture[k] = json.loads(v) # type: ignore
480
+ to_capture[k] = json.loads(v) # type: ignore[literal-required]
475
481
  except Exception:
476
- to_capture[k] = v.decode() # type: ignore
482
+ to_capture[k] = v.decode() # type: ignore[literal-required]
477
483
  elif k in ['general_timeout_in_sec', 'depth']:
478
484
  # int
479
- to_capture[k] = int(v) # type: ignore
485
+ to_capture[k] = int(v) # type: ignore[literal-required]
480
486
  elif k in ['rendered_hostname_only', 'with_favicon']:
481
487
  # bool
482
- to_capture[k] = bool(int(v)) # type: ignore
488
+ to_capture[k] = bool(int(v)) # type: ignore[literal-required]
483
489
  elif k == 'document':
484
490
  document_as_bytes = b64decode(v)
485
491
  else:
@@ -577,15 +583,15 @@ class LacusCore():
577
583
  proxy=proxy,
578
584
  general_timeout_in_sec=general_timeout) as capture:
579
585
  # required by Mypy: https://github.com/python/mypy/issues/3004
580
- capture.headers = to_capture.get('headers') # type: ignore
581
- capture.cookies = to_capture.get('cookies') # type: ignore
582
- capture.viewport = to_capture.get('viewport') # type: ignore
583
- capture.user_agent = to_capture.get('user_agent') # type: ignore
584
- capture.http_credentials = to_capture.get('http_credentials') # type: ignore
585
- capture.geolocation = to_capture.get('geolocation') # type: ignore
586
- capture.timezone_id = to_capture.get('timezone_id') # type: ignore
587
- capture.locale = to_capture.get('locale') # type: ignore
588
- capture.color_scheme = to_capture.get('color_scheme') # type: ignore
586
+ capture.headers = to_capture.get('headers') # type: ignore[assignment]
587
+ capture.cookies = to_capture.get('cookies') # type: ignore[assignment]
588
+ capture.viewport = to_capture.get('viewport') # type: ignore[assignment]
589
+ capture.user_agent = to_capture.get('user_agent') # type: ignore[assignment]
590
+ capture.http_credentials = to_capture.get('http_credentials') # type: ignore[assignment]
591
+ capture.geolocation = to_capture.get('geolocation') # type: ignore[assignment]
592
+ capture.timezone_id = to_capture.get('timezone_id') # type: ignore[assignment]
593
+ capture.locale = to_capture.get('locale') # type: ignore[assignment]
594
+ capture.color_scheme = to_capture.get('color_scheme') # type: ignore[assignment]
589
595
  try:
590
596
  await asyncio.wait_for(capture.initialize_context(), timeout=general_timeout)
591
597
  except (TimeoutError, asyncio.exceptions.TimeoutError):
@@ -748,10 +754,10 @@ class LacusCore():
748
754
  if key in ['har', 'cookies', 'potential_favicons', 'html', 'children'] or not results.get(key):
749
755
  continue
750
756
  # these entries can be stored directly
751
- hash_to_set[key] = results[key] # type: ignore
757
+ hash_to_set[key] = results[key] # type: ignore[literal-required]
752
758
 
753
759
  if hash_to_set:
754
- pipeline.hset(root_key, mapping=hash_to_set) # type: ignore
760
+ pipeline.hset(root_key, mapping=hash_to_set) # type: ignore[arg-type]
755
761
  # Make sure the key expires
756
762
  pipeline.expire(root_key, 36000)
757
763
  else:
@@ -782,19 +788,19 @@ class LacusCore():
782
788
  to_return['children'] = []
783
789
  for child_root_key in sorted(pickle.loads(value)):
784
790
  if child := self._get_capture_response(capture_uuid, child_root_key):
785
- to_return['children'].append(child) # type: ignore
791
+ to_return['children'].append(child) # type: ignore[union-attr]
786
792
  elif key in [b'status']:
787
793
  # The value in an int
788
- to_return[key.decode()] = int(value) # type: ignore
794
+ to_return[key.decode()] = int(value) # type: ignore[literal-required]
789
795
  elif key in [b'runtime']:
790
796
  # The value is a float
791
- to_return[key.decode()] = float(value) # type: ignore
797
+ to_return[key.decode()] = float(value) # type: ignore[literal-required]
792
798
  elif key in [b'last_redirected_url', b'error', b'error_name', b'html', b'downloaded_filename']:
793
799
  # the value is a string
794
- to_return[key.decode()] = value.decode() # type: ignore
800
+ to_return[key.decode()] = value.decode() # type: ignore[literal-required]
795
801
  elif key in [b'png', b'downloaded_file']:
796
802
  # the value is bytes
797
- to_return[key.decode()] = value # type: ignore
803
+ to_return[key.decode()] = value # type: ignore[literal-required]
798
804
  else:
799
805
  logger.critical(f'Unexpected key in response: {key} - {value}')
800
806
  return to_return
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "lacuscore"
3
- version = "1.7.10"
3
+ version = "1.8.0"
4
4
  description = "Core of Lacus, usable as a module"
5
5
  authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
6
6
  license = "BSD-3-Clause"
@@ -22,6 +22,7 @@ classifiers = [
22
22
  'Programming Language :: Python :: 3.9',
23
23
  'Programming Language :: Python :: 3.10',
24
24
  'Programming Language :: Python :: 3.11',
25
+ 'Programming Language :: Python :: 3.12',
25
26
  'Topic :: Security',
26
27
  'Topic :: Internet',
27
28
  ]
@@ -33,7 +34,7 @@ Sphinx = [
33
34
  {version = "<7.2", python = "<3.9", optional = true},
34
35
  {version = "^7.2", python = ">=3.9", optional = true}
35
36
  ]
36
- playwrightcapture = {extras = ["recaptcha"], version = "^1.22.7"}
37
+ playwrightcapture = {extras = ["recaptcha"], version = "^1.23.0"}
37
38
  defang = "^0.5.3"
38
39
  ua-parser = "^0.18.0"
39
40
  redis = {version = "^5.0.1", extras = ["hiredis"]}
@@ -44,14 +45,14 @@ docs = ["Sphinx"]
44
45
  [tool.poetry.group.dev.dependencies]
45
46
  types-redis = {version = "^4.6.0.20240106"}
46
47
  mypy = "^1.8.0"
47
- types-requests = "^2.31.0.20240106"
48
+ types-requests = "^2.31.0.20240125"
48
49
  types-beautifulsoup4 = "^4.12.0.20240106"
49
50
  ipython = [
50
51
  {version = "<8.13.0", python = "<3.9"},
51
52
  {version = "^8.18.0", python = ">=3.9"},
52
53
  {version = "^8.19.0", python = ">=3.10"}
53
54
  ]
54
- pytest = "^7.4.4"
55
+ pytest = "^8.0.0"
55
56
 
56
57
  [build-system]
57
58
  requires = ["poetry_core"]
File without changes
File without changes
File without changes