lacuscore 1.10.3__tar.gz → 1.10.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lacuscore
3
- Version: 1.10.3
3
+ Version: 1.10.5
4
4
  Summary: Core of Lacus, usable as a module
5
5
  Home-page: https://github.com/ail-project/LacusCore
6
6
  License: BSD-3-Clause
@@ -29,7 +29,7 @@ Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
29
29
  Requires-Dist: defang (>=0.5.3,<0.6.0)
30
30
  Requires-Dist: dnspython (>=2.6.1,<3.0.0)
31
31
  Requires-Dist: eval-type-backport (>=0.2.0,<0.3.0) ; python_version < "3.10"
32
- Requires-Dist: playwrightcapture[recaptcha] (>=1.25.6,<2.0.0)
32
+ Requires-Dist: playwrightcapture[recaptcha] (>=1.25.7,<2.0.0)
33
33
  Requires-Dist: pydantic (>=2.8.2,<3.0.0)
34
34
  Requires-Dist: redis[hiredis] (>=5.0.7,<6.0.0)
35
35
  Requires-Dist: requests (>=2.32.3,<3.0.0)
@@ -6,7 +6,7 @@ import json
6
6
 
7
7
  from enum import IntEnum, unique
8
8
  from logging import LoggerAdapter
9
- from typing import MutableMapping, Any, TypedDict, Literal, Mapping
9
+ from typing import MutableMapping, Any, TypedDict, Mapping, Literal
10
10
 
11
11
  from defang import refang # type: ignore[import-untyped]
12
12
  from pydantic import BaseModel, field_validator, model_validator, ValidationError
@@ -111,6 +111,24 @@ class CaptureSettings(BaseModel):
111
111
  depth: int = 0
112
112
  rendered_hostname_only: bool = True # Note: only used if depth is > 0
113
113
 
114
+ @model_validator(mode="before")
115
+ @classmethod
116
+ def empty_str_to_none(cls, data: Any) -> dict[str, Any] | Any:
117
+ if isinstance(data, dict):
118
+ # Make sure all the strings are stripped, and None if empty.
119
+ to_return: dict[str, Any] = {}
120
+ for k, v in data.items():
121
+ if isinstance(v, str):
122
+ if v_stripped := v.strip():
123
+ if v_stripped[0] in ['{', '[']:
124
+ to_return[k] = from_json(v_stripped)
125
+ else:
126
+ to_return[k] = v_stripped
127
+ else:
128
+ to_return[k] = v
129
+ return to_return
130
+ return data
131
+
114
132
  @model_validator(mode='after')
115
133
  def check_capture_element(self) -> CaptureSettings:
116
134
  if self.document_name and not self.document:
@@ -126,142 +144,142 @@ class CaptureSettings(BaseModel):
126
144
 
127
145
  @field_validator('url', mode='after')
128
146
  @classmethod
129
- def load_url(cls, v: str | None) -> str | None:
130
- if isinstance(v, str):
131
- url = v.strip()
132
- url = refang(url) # In case we get a defanged url at this stage.
133
- if (not url.lower().startswith('data:')
134
- and not url.lower().startswith('http:')
135
- and not url.lower().startswith('https:')
136
- and not url.lower().startswith('file:')):
137
- url = f'http://{url}'
138
- return url
139
- return v
147
+ def load_url(cls, url: str | None) -> str | None:
148
+ if isinstance(url, str):
149
+ _url = refang(url) # In case we get a defanged url at this stage.
150
+ if (not _url.lower().startswith('data:')
151
+ and not _url.lower().startswith('http:')
152
+ and not _url.lower().startswith('https:')
153
+ and not _url.lower().startswith('file:')):
154
+ _url = f'http://{_url}'
155
+ return _url
156
+ return url
140
157
 
141
158
  @field_validator('document_name', mode='after')
142
159
  @classmethod
143
- def load_document_name(cls, v: str | None) -> str | None:
144
- if isinstance(v, str):
145
- name = v.strip()
146
- if '.' not in name:
160
+ def load_document_name(cls, document_name: str | None) -> str | None:
161
+ if isinstance(document_name, str):
162
+ if '.' not in document_name:
147
163
  # The browser will simply display the file as text if there is no extension.
148
164
  # Just add HTML as a fallback, as it will be the most comon one.
149
- name = f'{name}.html'
150
- return name
151
- return v
165
+ document_name = f'{document_name}.html'
166
+ return document_name
167
+ return None
168
+
169
+ @field_validator('browser', mode='before')
170
+ @classmethod
171
+ def load_browser(cls, browser: Any) -> str | None:
172
+ if isinstance(browser, str) and browser in ['chromium', 'firefox', 'webkit']:
173
+ return browser
174
+ # There are old captures where the browser is not a playwright browser name, so we ignore it.
175
+ return None
152
176
 
153
177
  @field_validator('proxy', mode='before')
154
178
  @classmethod
155
- def load_proxy_json(cls, v: Any) -> str | dict[str, str] | None:
156
- if not v:
179
+ def load_proxy_json(cls, proxy: Any) -> str | dict[str, str] | None:
180
+ if not proxy:
157
181
  return None
158
- if isinstance(v, str):
159
- if v.startswith('{'):
160
- return from_json(v)
182
+ if isinstance(proxy, str):
161
183
  # Just the proxy
162
- return v
163
- elif isinstance(v, dict):
164
- return v
184
+ return proxy
185
+ elif isinstance(proxy, dict):
186
+ return proxy
165
187
  return None
166
188
 
167
189
  @field_validator('cookies', mode='before')
168
190
  @classmethod
169
- def load_cookies_json(cls, v: Any) -> list[dict[str, Any]] | None:
170
- if not v:
191
+ def load_cookies_json(cls, cookies: Any) -> list[dict[str, Any]] | None:
192
+ if not cookies:
171
193
  return None
172
- if isinstance(v, str):
173
- if v.startswith('['):
174
- return from_json(v)
194
+ if isinstance(cookies, str):
175
195
  # Cookies are invalid, ignoring.
176
- elif isinstance(v, list):
177
- return v
196
+ pass
197
+ elif isinstance(cookies, list):
198
+ return cookies
178
199
  return None
179
200
 
180
201
  @field_validator('headers', mode='before')
181
202
  @classmethod
182
- def load_headers_json(cls, v: Any) -> dict[str, str] | None:
183
- if not v:
203
+ def load_headers_json(cls, headers: Any) -> dict[str, str] | None:
204
+ if not headers:
184
205
  return None
185
- if isinstance(v, str):
186
- if v[0] == '{':
187
- return from_json(v)
188
- else:
189
- # make it a dict
190
- new_headers = {}
191
- for header_line in v.splitlines():
192
- if header_line and ':' in header_line:
193
- splitted = header_line.split(':', 1)
194
- if splitted and len(splitted) == 2:
195
- header, h_value = splitted
196
- if header.strip() and h_value.strip():
197
- new_headers[header.strip()] = h_value.strip()
198
- return new_headers
199
- elif isinstance(v, dict):
200
- return v
206
+ if isinstance(headers, str):
207
+ # make it a dict
208
+ new_headers = {}
209
+ for header_line in headers.splitlines():
210
+ if header_line and ':' in header_line:
211
+ splitted = header_line.split(':', 1)
212
+ if splitted and len(splitted) == 2:
213
+ header, h_value = splitted
214
+ if header.strip() and h_value.strip():
215
+ new_headers[header.strip()] = h_value.strip()
216
+ return new_headers
217
+ elif isinstance(headers, dict):
218
+ return headers
201
219
  return None
202
220
 
203
221
  @field_validator('http_credentials', mode='before')
204
222
  @classmethod
205
- def load_http_creds_json(cls, v: Any) -> dict[str, str] | None:
206
- if not v:
223
+ def load_http_creds_json(cls, http_credentials: Any) -> dict[str, str] | None:
224
+ if not http_credentials:
207
225
  return None
208
- if isinstance(v, str):
209
- if v.startswith('{'):
210
- return from_json(v)
211
- elif isinstance(v, dict):
212
- return v
226
+ if isinstance(http_credentials, str):
227
+ # ignore
228
+ return None
229
+ elif isinstance(http_credentials, dict):
230
+ return http_credentials
213
231
  return None
214
232
 
215
233
  @field_validator('http_credentials', mode='after')
216
234
  @classmethod
217
- def check_http_creds(cls, v: dict[str, str] | None) -> dict[str, str] | None:
218
- if not v:
219
- return v
220
- if 'username' in v and 'password' in v:
221
- return v
222
- raise CaptureSettingsError(f'HTTP credentials must have a username and a password: {v}')
235
+ def check_http_creds(cls, http_credentials: dict[str, str] | None) -> dict[str, str] | None:
236
+ if not http_credentials:
237
+ return None
238
+ if 'username' in http_credentials and 'password' in http_credentials:
239
+ return http_credentials
240
+ raise CaptureSettingsError(f'HTTP credentials must have a username and a password: {http_credentials}')
223
241
 
224
242
  @field_validator('geolocation', mode='before')
225
243
  @classmethod
226
- def load_geolocation_json(cls, v: Any) -> dict[str, float] | None:
227
- if not v:
244
+ def load_geolocation_json(cls, geolocation: Any) -> dict[str, float] | None:
245
+ if not geolocation:
246
+ return None
247
+ if isinstance(geolocation, str):
248
+ # ignore
228
249
  return None
229
- if isinstance(v, str):
230
- if v.startswith('{'):
231
- return from_json(v)
232
- elif isinstance(v, dict):
233
- return v
250
+ elif isinstance(geolocation, dict):
251
+ return geolocation
234
252
  return None
235
253
 
236
254
  @field_validator('geolocation', mode='after')
237
255
  @classmethod
238
- def check_geolocation(cls, v: dict[str, float] | None) -> dict[str, float] | None:
239
- if not v:
240
- return v
241
- if 'latitude' in v and 'longitude' in v:
242
- return v
243
- raise CaptureSettingsError(f'A geolocation must have a latitude and a longitude: {v}')
256
+ def check_geolocation(cls, geolocation: dict[str, float] | None) -> dict[str, float] | None:
257
+ if not geolocation:
258
+ return None
259
+ if 'latitude' in geolocation and 'longitude' in geolocation:
260
+ return geolocation
261
+ raise CaptureSettingsError(f'A geolocation must have a latitude and a longitude: {geolocation}')
244
262
 
245
263
  @field_validator('viewport', mode='before')
246
264
  @classmethod
247
- def load_viewport_json(cls, v: Any) -> dict[str, int] | None:
248
- if not v:
265
+ def load_viewport_json(cls, viewport: Any) -> dict[str, int] | None:
266
+ if not viewport:
267
+ return None
268
+ if isinstance(viewport, str):
269
+ # ignore
249
270
  return None
250
- if isinstance(v, str):
251
- if v.startswith('{'):
252
- return from_json(v)
253
- elif isinstance(v, dict):
254
- return v
271
+ elif isinstance(viewport, dict):
272
+ return viewport
255
273
  return None
256
274
 
257
275
  @field_validator('viewport', mode='after')
258
276
  @classmethod
259
- def check_viewport(cls, v: dict[str, int] | None) -> dict[str, int] | None:
260
- if not v:
261
- return v
262
- if 'width' in v and 'height' in v:
263
- return v
264
- raise CaptureSettingsError(f'A viewport must have a width and a height: {v}')
277
+ def check_viewport(cls, viewport: dict[str, int] | None) -> dict[str, int] | None:
278
+ if not viewport:
279
+ return None
280
+ if 'width' in viewport and 'height' in viewport:
281
+ return viewport
282
+ raise CaptureSettingsError(f'A viewport must have a width and a height: {viewport}')
265
283
 
266
284
  def redis_dump(self) -> Mapping[str | bytes, bytes | float | int | str]:
267
285
  mapping_capture: dict[str | bytes, bytes | float | int | str] = {}
@@ -6,7 +6,6 @@ from typing import Any, Coroutine, Optional, TypeVar, Tuple
6
6
 
7
7
  import asyncio
8
8
  import functools
9
- import logging
10
9
 
11
10
  from .helpers import LacusCoreLogAdapter
12
11
 
@@ -43,7 +42,7 @@ def create_task(
43
42
  def _handle_task_result(
44
43
  task: asyncio.Task[Any],
45
44
  *,
46
- logger: logging.Logger,
45
+ logger: 'LacusCoreLogAdapter',
47
46
  message: str,
48
47
  message_args: Tuple[Any, ...] = (),
49
48
  ) -> None:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "lacuscore"
3
- version = "1.10.3"
3
+ version = "1.10.5"
4
4
  description = "Core of Lacus, usable as a module"
5
5
  authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
6
6
  license = "BSD-3-Clause"
@@ -34,7 +34,7 @@ Sphinx = [
34
34
  {version = "<7.2", python = "<3.9", optional = true},
35
35
  {version = "^7.2", python = ">=3.9", optional = true}
36
36
  ]
37
- playwrightcapture = {extras = ["recaptcha"], version = "^1.25.6"}
37
+ playwrightcapture = {extras = ["recaptcha"], version = "^1.25.7"}
38
38
  defang = "^0.5.3"
39
39
  ua-parser = "^0.18.0"
40
40
  redis = {version = "^5.0.7", extras = ["hiredis"]}
@@ -48,7 +48,7 @@ docs = ["Sphinx"]
48
48
 
49
49
  [tool.poetry.group.dev.dependencies]
50
50
  types-redis = {version = "^4.6.0.20240425"}
51
- mypy = "^1.10.1"
51
+ mypy = "^1.11.0"
52
52
  types-requests = "^2.32.0.20240712"
53
53
  types-beautifulsoup4 = "^4.12.0.20240511"
54
54
  ipython = [
@@ -56,7 +56,7 @@ ipython = [
56
56
  {version = "^8.18.0", python = ">=3.9"},
57
57
  {version = "^8.19.0", python = ">=3.10"}
58
58
  ]
59
- pytest = "^8.2.2"
59
+ pytest = "^8.3.1"
60
60
 
61
61
  [build-system]
62
62
  requires = ["poetry_core"]
File without changes
File without changes