lacuscore 1.10.4__py3-none-any.whl → 1.10.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lacuscore/helpers.py +110 -92
- lacuscore/lacuscore.py +2 -2
- lacuscore/task_logger.py +1 -2
- {lacuscore-1.10.4.dist-info → lacuscore-1.10.5.dist-info}/METADATA +2 -2
- lacuscore-1.10.5.dist-info/RECORD +10 -0
- lacuscore-1.10.4.dist-info/RECORD +0 -10
- {lacuscore-1.10.4.dist-info → lacuscore-1.10.5.dist-info}/LICENSE +0 -0
- {lacuscore-1.10.4.dist-info → lacuscore-1.10.5.dist-info}/WHEEL +0 -0
lacuscore/helpers.py
CHANGED
@@ -6,7 +6,7 @@ import json
|
|
6
6
|
|
7
7
|
from enum import IntEnum, unique
|
8
8
|
from logging import LoggerAdapter
|
9
|
-
from typing import MutableMapping, Any, TypedDict, Mapping
|
9
|
+
from typing import MutableMapping, Any, TypedDict, Mapping, Literal
|
10
10
|
|
11
11
|
from defang import refang # type: ignore[import-untyped]
|
12
12
|
from pydantic import BaseModel, field_validator, model_validator, ValidationError
|
@@ -87,7 +87,7 @@ class CaptureSettings(BaseModel):
|
|
87
87
|
url: str | None = None
|
88
88
|
document_name: str | None = None
|
89
89
|
document: str | None = None
|
90
|
-
browser:
|
90
|
+
browser: Literal['chromium', 'firefox', 'webkit'] | None = None
|
91
91
|
device_name: str | None = None
|
92
92
|
user_agent: str | None = None
|
93
93
|
proxy: str | dict[str, str] | None = None
|
@@ -111,6 +111,24 @@ class CaptureSettings(BaseModel):
|
|
111
111
|
depth: int = 0
|
112
112
|
rendered_hostname_only: bool = True # Note: only used if depth is > 0
|
113
113
|
|
114
|
+
@model_validator(mode="before")
|
115
|
+
@classmethod
|
116
|
+
def empty_str_to_none(cls, data: Any) -> dict[str, Any] | Any:
|
117
|
+
if isinstance(data, dict):
|
118
|
+
# Make sure all the strings are stripped, and None if empty.
|
119
|
+
to_return: dict[str, Any] = {}
|
120
|
+
for k, v in data.items():
|
121
|
+
if isinstance(v, str):
|
122
|
+
if v_stripped := v.strip():
|
123
|
+
if v_stripped[0] in ['{', '[']:
|
124
|
+
to_return[k] = from_json(v_stripped)
|
125
|
+
else:
|
126
|
+
to_return[k] = v_stripped
|
127
|
+
else:
|
128
|
+
to_return[k] = v
|
129
|
+
return to_return
|
130
|
+
return data
|
131
|
+
|
114
132
|
@model_validator(mode='after')
|
115
133
|
def check_capture_element(self) -> CaptureSettings:
|
116
134
|
if self.document_name and not self.document:
|
@@ -126,142 +144,142 @@ class CaptureSettings(BaseModel):
|
|
126
144
|
|
127
145
|
@field_validator('url', mode='after')
|
128
146
|
@classmethod
|
129
|
-
def load_url(cls,
|
130
|
-
if isinstance(
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
and not
|
135
|
-
and not
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
return v
|
147
|
+
def load_url(cls, url: str | None) -> str | None:
|
148
|
+
if isinstance(url, str):
|
149
|
+
_url = refang(url) # In case we get a defanged url at this stage.
|
150
|
+
if (not _url.lower().startswith('data:')
|
151
|
+
and not _url.lower().startswith('http:')
|
152
|
+
and not _url.lower().startswith('https:')
|
153
|
+
and not _url.lower().startswith('file:')):
|
154
|
+
_url = f'http://{_url}'
|
155
|
+
return _url
|
156
|
+
return url
|
140
157
|
|
141
158
|
@field_validator('document_name', mode='after')
|
142
159
|
@classmethod
|
143
|
-
def load_document_name(cls,
|
144
|
-
if isinstance(
|
145
|
-
|
146
|
-
if '.' not in name:
|
160
|
+
def load_document_name(cls, document_name: str | None) -> str | None:
|
161
|
+
if isinstance(document_name, str):
|
162
|
+
if '.' not in document_name:
|
147
163
|
# The browser will simply display the file as text if there is no extension.
|
148
164
|
# Just add HTML as a fallback, as it will be the most comon one.
|
149
|
-
|
150
|
-
return
|
151
|
-
return
|
165
|
+
document_name = f'{document_name}.html'
|
166
|
+
return document_name
|
167
|
+
return None
|
168
|
+
|
169
|
+
@field_validator('browser', mode='before')
|
170
|
+
@classmethod
|
171
|
+
def load_browser(cls, browser: Any) -> str | None:
|
172
|
+
if isinstance(browser, str) and browser in ['chromium', 'firefox', 'webkit']:
|
173
|
+
return browser
|
174
|
+
# There are old captures where the browser is not a playwright browser name, so we ignore it.
|
175
|
+
return None
|
152
176
|
|
153
177
|
@field_validator('proxy', mode='before')
|
154
178
|
@classmethod
|
155
|
-
def load_proxy_json(cls,
|
156
|
-
if not
|
179
|
+
def load_proxy_json(cls, proxy: Any) -> str | dict[str, str] | None:
|
180
|
+
if not proxy:
|
157
181
|
return None
|
158
|
-
if isinstance(
|
159
|
-
if v.startswith('{'):
|
160
|
-
return from_json(v)
|
182
|
+
if isinstance(proxy, str):
|
161
183
|
# Just the proxy
|
162
|
-
return
|
163
|
-
elif isinstance(
|
164
|
-
return
|
184
|
+
return proxy
|
185
|
+
elif isinstance(proxy, dict):
|
186
|
+
return proxy
|
165
187
|
return None
|
166
188
|
|
167
189
|
@field_validator('cookies', mode='before')
|
168
190
|
@classmethod
|
169
|
-
def load_cookies_json(cls,
|
170
|
-
if not
|
191
|
+
def load_cookies_json(cls, cookies: Any) -> list[dict[str, Any]] | None:
|
192
|
+
if not cookies:
|
171
193
|
return None
|
172
|
-
if isinstance(
|
173
|
-
if v.startswith('['):
|
174
|
-
return from_json(v)
|
194
|
+
if isinstance(cookies, str):
|
175
195
|
# Cookies are invalid, ignoring.
|
176
|
-
|
177
|
-
|
196
|
+
pass
|
197
|
+
elif isinstance(cookies, list):
|
198
|
+
return cookies
|
178
199
|
return None
|
179
200
|
|
180
201
|
@field_validator('headers', mode='before')
|
181
202
|
@classmethod
|
182
|
-
def load_headers_json(cls,
|
183
|
-
if not
|
203
|
+
def load_headers_json(cls, headers: Any) -> dict[str, str] | None:
|
204
|
+
if not headers:
|
184
205
|
return None
|
185
|
-
if isinstance(
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
return new_headers
|
199
|
-
elif isinstance(v, dict):
|
200
|
-
return v
|
206
|
+
if isinstance(headers, str):
|
207
|
+
# make it a dict
|
208
|
+
new_headers = {}
|
209
|
+
for header_line in headers.splitlines():
|
210
|
+
if header_line and ':' in header_line:
|
211
|
+
splitted = header_line.split(':', 1)
|
212
|
+
if splitted and len(splitted) == 2:
|
213
|
+
header, h_value = splitted
|
214
|
+
if header.strip() and h_value.strip():
|
215
|
+
new_headers[header.strip()] = h_value.strip()
|
216
|
+
return new_headers
|
217
|
+
elif isinstance(headers, dict):
|
218
|
+
return headers
|
201
219
|
return None
|
202
220
|
|
203
221
|
@field_validator('http_credentials', mode='before')
|
204
222
|
@classmethod
|
205
|
-
def load_http_creds_json(cls,
|
206
|
-
if not
|
223
|
+
def load_http_creds_json(cls, http_credentials: Any) -> dict[str, str] | None:
|
224
|
+
if not http_credentials:
|
207
225
|
return None
|
208
|
-
if isinstance(
|
209
|
-
|
210
|
-
|
211
|
-
elif isinstance(
|
212
|
-
return
|
226
|
+
if isinstance(http_credentials, str):
|
227
|
+
# ignore
|
228
|
+
return None
|
229
|
+
elif isinstance(http_credentials, dict):
|
230
|
+
return http_credentials
|
213
231
|
return None
|
214
232
|
|
215
233
|
@field_validator('http_credentials', mode='after')
|
216
234
|
@classmethod
|
217
|
-
def check_http_creds(cls,
|
218
|
-
if not
|
219
|
-
return
|
220
|
-
if 'username' in
|
221
|
-
return
|
222
|
-
raise CaptureSettingsError(f'HTTP credentials must have a username and a password: {
|
235
|
+
def check_http_creds(cls, http_credentials: dict[str, str] | None) -> dict[str, str] | None:
|
236
|
+
if not http_credentials:
|
237
|
+
return None
|
238
|
+
if 'username' in http_credentials and 'password' in http_credentials:
|
239
|
+
return http_credentials
|
240
|
+
raise CaptureSettingsError(f'HTTP credentials must have a username and a password: {http_credentials}')
|
223
241
|
|
224
242
|
@field_validator('geolocation', mode='before')
|
225
243
|
@classmethod
|
226
|
-
def load_geolocation_json(cls,
|
227
|
-
if not
|
244
|
+
def load_geolocation_json(cls, geolocation: Any) -> dict[str, float] | None:
|
245
|
+
if not geolocation:
|
246
|
+
return None
|
247
|
+
if isinstance(geolocation, str):
|
248
|
+
# ignore
|
228
249
|
return None
|
229
|
-
|
230
|
-
|
231
|
-
return from_json(v)
|
232
|
-
elif isinstance(v, dict):
|
233
|
-
return v
|
250
|
+
elif isinstance(geolocation, dict):
|
251
|
+
return geolocation
|
234
252
|
return None
|
235
253
|
|
236
254
|
@field_validator('geolocation', mode='after')
|
237
255
|
@classmethod
|
238
|
-
def check_geolocation(cls,
|
239
|
-
if not
|
240
|
-
return
|
241
|
-
if 'latitude' in
|
242
|
-
return
|
243
|
-
raise CaptureSettingsError(f'A geolocation must have a latitude and a longitude: {
|
256
|
+
def check_geolocation(cls, geolocation: dict[str, float] | None) -> dict[str, float] | None:
|
257
|
+
if not geolocation:
|
258
|
+
return None
|
259
|
+
if 'latitude' in geolocation and 'longitude' in geolocation:
|
260
|
+
return geolocation
|
261
|
+
raise CaptureSettingsError(f'A geolocation must have a latitude and a longitude: {geolocation}')
|
244
262
|
|
245
263
|
@field_validator('viewport', mode='before')
|
246
264
|
@classmethod
|
247
|
-
def load_viewport_json(cls,
|
248
|
-
if not
|
265
|
+
def load_viewport_json(cls, viewport: Any) -> dict[str, int] | None:
|
266
|
+
if not viewport:
|
267
|
+
return None
|
268
|
+
if isinstance(viewport, str):
|
269
|
+
# ignore
|
249
270
|
return None
|
250
|
-
|
251
|
-
|
252
|
-
return from_json(v)
|
253
|
-
elif isinstance(v, dict):
|
254
|
-
return v
|
271
|
+
elif isinstance(viewport, dict):
|
272
|
+
return viewport
|
255
273
|
return None
|
256
274
|
|
257
275
|
@field_validator('viewport', mode='after')
|
258
276
|
@classmethod
|
259
|
-
def check_viewport(cls,
|
260
|
-
if not
|
261
|
-
return
|
262
|
-
if 'width' in
|
263
|
-
return
|
264
|
-
raise CaptureSettingsError(f'A viewport must have a width and a height: {
|
277
|
+
def check_viewport(cls, viewport: dict[str, int] | None) -> dict[str, int] | None:
|
278
|
+
if not viewport:
|
279
|
+
return None
|
280
|
+
if 'width' in viewport and 'height' in viewport:
|
281
|
+
return viewport
|
282
|
+
raise CaptureSettingsError(f'A viewport must have a width and a height: {viewport}')
|
265
283
|
|
266
284
|
def redis_dump(self) -> Mapping[str | bytes, bytes | float | int | str]:
|
267
285
|
mapping_capture: dict[str | bytes, bytes | float | int | str] = {}
|
lacuscore/lacuscore.py
CHANGED
@@ -433,8 +433,8 @@ class LacusCore():
|
|
433
433
|
|
434
434
|
# Set default as chromium
|
435
435
|
browser_engine: BROWSER = "chromium"
|
436
|
-
if to_capture.browser
|
437
|
-
browser_engine = to_capture.browser
|
436
|
+
if to_capture.browser:
|
437
|
+
browser_engine = to_capture.browser
|
438
438
|
elif to_capture.user_agent:
|
439
439
|
parsed_string = user_agent_parser.ParseUserAgent(to_capture.user_agent)
|
440
440
|
browser_family = parsed_string['family'].lower()
|
lacuscore/task_logger.py
CHANGED
@@ -6,7 +6,6 @@ from typing import Any, Coroutine, Optional, TypeVar, Tuple
|
|
6
6
|
|
7
7
|
import asyncio
|
8
8
|
import functools
|
9
|
-
import logging
|
10
9
|
|
11
10
|
from .helpers import LacusCoreLogAdapter
|
12
11
|
|
@@ -43,7 +42,7 @@ def create_task(
|
|
43
42
|
def _handle_task_result(
|
44
43
|
task: asyncio.Task[Any],
|
45
44
|
*,
|
46
|
-
logger:
|
45
|
+
logger: 'LacusCoreLogAdapter',
|
47
46
|
message: str,
|
48
47
|
message_args: Tuple[Any, ...] = (),
|
49
48
|
) -> None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lacuscore
|
3
|
-
Version: 1.10.
|
3
|
+
Version: 1.10.5
|
4
4
|
Summary: Core of Lacus, usable as a module
|
5
5
|
Home-page: https://github.com/ail-project/LacusCore
|
6
6
|
License: BSD-3-Clause
|
@@ -29,7 +29,7 @@ Requires-Dist: async-timeout (>=4.0.3,<5.0.0) ; python_version < "3.11"
|
|
29
29
|
Requires-Dist: defang (>=0.5.3,<0.6.0)
|
30
30
|
Requires-Dist: dnspython (>=2.6.1,<3.0.0)
|
31
31
|
Requires-Dist: eval-type-backport (>=0.2.0,<0.3.0) ; python_version < "3.10"
|
32
|
-
Requires-Dist: playwrightcapture[recaptcha] (>=1.25.
|
32
|
+
Requires-Dist: playwrightcapture[recaptcha] (>=1.25.7,<2.0.0)
|
33
33
|
Requires-Dist: pydantic (>=2.8.2,<3.0.0)
|
34
34
|
Requires-Dist: redis[hiredis] (>=5.0.7,<6.0.0)
|
35
35
|
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
@@ -0,0 +1,10 @@
|
|
1
|
+
lacuscore/__init__.py,sha256=aLBshQPT9IBDKn5qWrX9A_exqtLFPyLsQiPWdfpAFjA,537
|
2
|
+
lacuscore/helpers.py,sha256=cGapH4apNJIkKCF0vzumKCJ_ngAsqoq4IjA9iZWbkfU,10856
|
3
|
+
lacuscore/lacus_monitoring.py,sha256=UOfE_1-_rhVeKJXQ_m9XxYkr7VwyQnA6iK-x_tcXJfo,2775
|
4
|
+
lacuscore/lacuscore.py,sha256=eQMJBf0ZYkCkACu3OWCdidpNEYn0wZ1piIVHP1kv_kU,39372
|
5
|
+
lacuscore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
lacuscore/task_logger.py,sha256=kWBThqfv_alu1YA3jEqP4GsqXIVKLbzyI7w14aJ2g9I,1908
|
7
|
+
lacuscore-1.10.5.dist-info/LICENSE,sha256=4C4hLYrIkUD96Ggk-y_Go1Qf7PBZrEm9PSeTGe2nd4s,1516
|
8
|
+
lacuscore-1.10.5.dist-info/METADATA,sha256=5SVAbyOXH-4Qp-0KU2pbjWppMHHrGWhtGIrM31VQ0hk,2747
|
9
|
+
lacuscore-1.10.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
10
|
+
lacuscore-1.10.5.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
lacuscore/__init__.py,sha256=aLBshQPT9IBDKn5qWrX9A_exqtLFPyLsQiPWdfpAFjA,537
|
2
|
-
lacuscore/helpers.py,sha256=GAn47fHikhVciuKtFK0-EbxHbBUNiImtUH6EJ_Vdgu4,9594
|
3
|
-
lacuscore/lacus_monitoring.py,sha256=UOfE_1-_rhVeKJXQ_m9XxYkr7VwyQnA6iK-x_tcXJfo,2775
|
4
|
-
lacuscore/lacuscore.py,sha256=rFOqUJ0cY58iFK0EwDRSqlSydLjEzUZZiWG5xHCNN_M,39437
|
5
|
-
lacuscore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
lacuscore/task_logger.py,sha256=8WbdJdKnGeFCxt9gtCNLI9vAQQZbsy2I5PRQpHP7XFU,1916
|
7
|
-
lacuscore-1.10.4.dist-info/LICENSE,sha256=4C4hLYrIkUD96Ggk-y_Go1Qf7PBZrEm9PSeTGe2nd4s,1516
|
8
|
-
lacuscore-1.10.4.dist-info/METADATA,sha256=N0kfRd8E1FjpauTYBnbUsWtQbSejNT8ae9gaNX9Zp0c,2747
|
9
|
-
lacuscore-1.10.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
10
|
-
lacuscore-1.10.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|