scrapling 0.2.99__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrapling/__init__.py +18 -31
- scrapling/cli.py +818 -20
- scrapling/core/_html_utils.py +348 -0
- scrapling/core/_types.py +34 -17
- scrapling/core/ai.py +611 -0
- scrapling/core/custom_types.py +183 -100
- scrapling/core/mixins.py +27 -19
- scrapling/core/shell.py +647 -0
- scrapling/core/{storage_adaptors.py → storage.py} +41 -33
- scrapling/core/translator.py +20 -26
- scrapling/core/utils.py +49 -54
- scrapling/engines/__init__.py +15 -6
- scrapling/engines/_browsers/__init__.py +2 -0
- scrapling/engines/_browsers/_camoufox.py +759 -0
- scrapling/engines/_browsers/_config_tools.py +130 -0
- scrapling/engines/_browsers/_controllers.py +644 -0
- scrapling/engines/_browsers/_page.py +93 -0
- scrapling/engines/_browsers/_validators.py +170 -0
- scrapling/engines/constants.py +101 -88
- scrapling/engines/static.py +667 -110
- scrapling/engines/toolbelt/__init__.py +20 -6
- scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js +2 -1
- scrapling/engines/toolbelt/convertor.py +254 -0
- scrapling/engines/toolbelt/custom.py +158 -175
- scrapling/engines/toolbelt/fingerprints.py +32 -46
- scrapling/engines/toolbelt/navigation.py +68 -39
- scrapling/fetchers.py +239 -333
- scrapling/parser.py +781 -449
- scrapling-0.3.1.dist-info/METADATA +411 -0
- scrapling-0.3.1.dist-info/RECORD +41 -0
- {scrapling-0.2.99.dist-info → scrapling-0.3.1.dist-info}/WHEEL +1 -1
- {scrapling-0.2.99.dist-info → scrapling-0.3.1.dist-info}/top_level.txt +0 -1
- scrapling/defaults.py +0 -25
- scrapling/engines/camo.py +0 -339
- scrapling/engines/pw.py +0 -465
- scrapling/engines/toolbelt/bypasses/pdf_viewer.js +0 -5
- scrapling-0.2.99.dist-info/METADATA +0 -290
- scrapling-0.2.99.dist-info/RECORD +0 -49
- tests/__init__.py +0 -1
- tests/fetchers/__init__.py +0 -1
- tests/fetchers/async/__init__.py +0 -0
- tests/fetchers/async/test_camoufox.py +0 -97
- tests/fetchers/async/test_httpx.py +0 -85
- tests/fetchers/async/test_playwright.py +0 -101
- tests/fetchers/sync/__init__.py +0 -0
- tests/fetchers/sync/test_camoufox.py +0 -70
- tests/fetchers/sync/test_httpx.py +0 -84
- tests/fetchers/sync/test_playwright.py +0 -89
- tests/fetchers/test_utils.py +0 -97
- tests/parser/__init__.py +0 -0
- tests/parser/test_automatch.py +0 -111
- tests/parser/test_general.py +0 -330
- {scrapling-0.2.99.dist-info → scrapling-0.3.1.dist-info}/entry_points.txt +0 -0
- {scrapling-0.2.99.dist-info → scrapling-0.3.1.dist-info}/licenses/LICENSE +0 -0
scrapling/core/custom_types.py
CHANGED
@@ -1,159 +1,198 @@
|
|
1
|
-
import re
|
2
|
-
import typing
|
3
1
|
from collections.abc import Mapping
|
4
2
|
from types import MappingProxyType
|
3
|
+
from re import compile as re_compile, UNICODE, IGNORECASE
|
5
4
|
|
6
5
|
from orjson import dumps, loads
|
7
|
-
from w3lib.html import replace_entities as _replace_entities
|
8
6
|
|
9
|
-
from scrapling.core._types import (
|
10
|
-
|
11
|
-
|
7
|
+
from scrapling.core._types import (
|
8
|
+
cast,
|
9
|
+
Dict,
|
10
|
+
List,
|
11
|
+
Union,
|
12
|
+
overload,
|
13
|
+
TypeVar,
|
14
|
+
Literal,
|
15
|
+
Pattern,
|
16
|
+
Iterable,
|
17
|
+
Optional,
|
18
|
+
Generator,
|
19
|
+
SupportsIndex,
|
20
|
+
)
|
21
|
+
from scrapling.core.utils import _is_iterable, flatten, __CONSECUTIVE_SPACES_REGEX__
|
22
|
+
from scrapling.core._html_utils import _replace_entities
|
12
23
|
|
13
24
|
# Define type variable for AttributeHandler value type
|
14
|
-
_TextHandlerType = TypeVar(
|
25
|
+
_TextHandlerType = TypeVar("_TextHandlerType", bound="TextHandler")
|
26
|
+
__CLEANING_TABLE__ = str.maketrans("\t\r\n", " ")
|
15
27
|
|
16
28
|
|
17
29
|
class TextHandler(str):
|
18
30
|
"""Extends standard Python string by adding more functionality"""
|
19
|
-
__slots__ = ()
|
20
31
|
|
21
|
-
|
22
|
-
return super().__new__(cls, str(string))
|
32
|
+
__slots__ = ()
|
23
33
|
|
24
|
-
def __getitem__(
|
34
|
+
def __getitem__(
|
35
|
+
self, key: SupportsIndex | slice
|
36
|
+
) -> "TextHandler": # pragma: no cover
|
25
37
|
lst = super().__getitem__(key)
|
26
|
-
return
|
38
|
+
return cast(_TextHandlerType, TextHandler(lst))
|
27
39
|
|
28
|
-
def split(
|
40
|
+
def split(
|
41
|
+
self, sep: str = None, maxsplit: SupportsIndex = -1
|
42
|
+
) -> "TextHandlers": # pragma: no cover
|
29
43
|
return TextHandlers(
|
30
|
-
|
44
|
+
cast(
|
45
|
+
List[_TextHandlerType],
|
46
|
+
[TextHandler(s) for s in super().split(sep, maxsplit)],
|
47
|
+
)
|
31
48
|
)
|
32
49
|
|
33
|
-
def strip(self, chars: str = None) -> Union[str,
|
50
|
+
def strip(self, chars: str = None) -> Union[str, "TextHandler"]: # pragma: no cover
|
34
51
|
return TextHandler(super().strip(chars))
|
35
52
|
|
36
|
-
def lstrip(
|
53
|
+
def lstrip(
|
54
|
+
self, chars: str = None
|
55
|
+
) -> Union[str, "TextHandler"]: # pragma: no cover
|
37
56
|
return TextHandler(super().lstrip(chars))
|
38
57
|
|
39
|
-
def rstrip(
|
58
|
+
def rstrip(
|
59
|
+
self, chars: str = None
|
60
|
+
) -> Union[str, "TextHandler"]: # pragma: no cover
|
40
61
|
return TextHandler(super().rstrip(chars))
|
41
62
|
|
42
|
-
def capitalize(self) -> Union[str,
|
63
|
+
def capitalize(self) -> Union[str, "TextHandler"]: # pragma: no cover
|
43
64
|
return TextHandler(super().capitalize())
|
44
65
|
|
45
|
-
def casefold(self) -> Union[str,
|
66
|
+
def casefold(self) -> Union[str, "TextHandler"]: # pragma: no cover
|
46
67
|
return TextHandler(super().casefold())
|
47
68
|
|
48
|
-
def center(
|
69
|
+
def center(
|
70
|
+
self, width: SupportsIndex, fillchar: str = " "
|
71
|
+
) -> Union[str, "TextHandler"]: # pragma: no cover
|
49
72
|
return TextHandler(super().center(width, fillchar))
|
50
73
|
|
51
|
-
def expandtabs(
|
74
|
+
def expandtabs(
|
75
|
+
self, tabsize: SupportsIndex = 8
|
76
|
+
) -> Union[str, "TextHandler"]: # pragma: no cover
|
52
77
|
return TextHandler(super().expandtabs(tabsize))
|
53
78
|
|
54
|
-
def format(
|
79
|
+
def format(
|
80
|
+
self, *args: str, **kwargs: str
|
81
|
+
) -> Union[str, "TextHandler"]: # pragma: no cover
|
55
82
|
return TextHandler(super().format(*args, **kwargs))
|
56
83
|
|
57
|
-
def format_map(self, mapping) -> Union[str,
|
84
|
+
def format_map(self, mapping) -> Union[str, "TextHandler"]: # pragma: no cover
|
58
85
|
return TextHandler(super().format_map(mapping))
|
59
86
|
|
60
|
-
def join(
|
87
|
+
def join(
|
88
|
+
self, iterable: Iterable[str]
|
89
|
+
) -> Union[str, "TextHandler"]: # pragma: no cover
|
61
90
|
return TextHandler(super().join(iterable))
|
62
91
|
|
63
|
-
def ljust(
|
92
|
+
def ljust(
|
93
|
+
self, width: SupportsIndex, fillchar: str = " "
|
94
|
+
) -> Union[str, "TextHandler"]: # pragma: no cover
|
64
95
|
return TextHandler(super().ljust(width, fillchar))
|
65
96
|
|
66
|
-
def rjust(
|
97
|
+
def rjust(
|
98
|
+
self, width: SupportsIndex, fillchar: str = " "
|
99
|
+
) -> Union[str, "TextHandler"]: # pragma: no cover
|
67
100
|
return TextHandler(super().rjust(width, fillchar))
|
68
101
|
|
69
|
-
def swapcase(self) -> Union[str,
|
102
|
+
def swapcase(self) -> Union[str, "TextHandler"]: # pragma: no cover
|
70
103
|
return TextHandler(super().swapcase())
|
71
104
|
|
72
|
-
def title(self) -> Union[str,
|
105
|
+
def title(self) -> Union[str, "TextHandler"]: # pragma: no cover
|
73
106
|
return TextHandler(super().title())
|
74
107
|
|
75
|
-
def translate(self, table) -> Union[str,
|
108
|
+
def translate(self, table) -> Union[str, "TextHandler"]: # pragma: no cover
|
76
109
|
return TextHandler(super().translate(table))
|
77
110
|
|
78
|
-
def zfill(
|
111
|
+
def zfill(
|
112
|
+
self, width: SupportsIndex
|
113
|
+
) -> Union[str, "TextHandler"]: # pragma: no cover
|
79
114
|
return TextHandler(super().zfill(width))
|
80
115
|
|
81
|
-
def replace(
|
116
|
+
def replace(
|
117
|
+
self, old: str, new: str, count: SupportsIndex = -1
|
118
|
+
) -> Union[str, "TextHandler"]:
|
82
119
|
return TextHandler(super().replace(old, new, count))
|
83
120
|
|
84
|
-
def upper(self) -> Union[str,
|
121
|
+
def upper(self) -> Union[str, "TextHandler"]:
|
85
122
|
return TextHandler(super().upper())
|
86
123
|
|
87
|
-
def lower(self) -> Union[str,
|
124
|
+
def lower(self) -> Union[str, "TextHandler"]:
|
88
125
|
return TextHandler(super().lower())
|
126
|
+
|
89
127
|
##############
|
90
128
|
|
91
|
-
def sort(self, reverse: bool = False) -> Union[str,
|
129
|
+
def sort(self, reverse: bool = False) -> Union[str, "TextHandler"]:
|
92
130
|
"""Return a sorted version of the string"""
|
93
131
|
return self.__class__("".join(sorted(self, reverse=reverse)))
|
94
132
|
|
95
|
-
def clean(self) -> Union[str,
|
133
|
+
def clean(self) -> Union[str, "TextHandler"]:
|
96
134
|
"""Return a new version of the string after removing all white spaces and consecutive spaces"""
|
97
|
-
data =
|
98
|
-
|
99
|
-
return self.__class__(data.strip())
|
135
|
+
data = self.translate(__CLEANING_TABLE__)
|
136
|
+
return self.__class__(__CONSECUTIVE_SPACES_REGEX__.sub(" ", data).strip())
|
100
137
|
|
101
138
|
# For easy copy-paste from Scrapy/parsel code when needed :)
|
102
|
-
def get(self, default=None):
|
139
|
+
def get(self, default=None): # pragma: no cover
|
103
140
|
return self
|
104
141
|
|
105
|
-
def get_all(self):
|
142
|
+
def get_all(self): # pragma: no cover
|
106
143
|
return self
|
107
144
|
|
108
145
|
extract = get_all
|
109
146
|
extract_first = get
|
110
147
|
|
111
148
|
def json(self) -> Dict:
|
112
|
-
"""Return
|
113
|
-
# Using str function as a workaround for orjson issue with subclasses of str
|
149
|
+
"""Return JSON response if the response is jsonable otherwise throw error"""
|
150
|
+
# Using str function as a workaround for orjson issue with subclasses of str.
|
114
151
|
# Check this out: https://github.com/ijl/orjson/issues/445
|
115
152
|
return loads(str(self))
|
116
153
|
|
117
|
-
@
|
154
|
+
@overload
|
118
155
|
def re(
|
119
156
|
self,
|
120
|
-
regex:
|
157
|
+
regex: str | Pattern,
|
121
158
|
check_match: Literal[True],
|
122
159
|
replace_entities: bool = True,
|
123
160
|
clean_match: bool = False,
|
124
161
|
case_sensitive: bool = True,
|
125
|
-
) -> bool:
|
126
|
-
...
|
162
|
+
) -> bool: ...
|
127
163
|
|
128
|
-
@
|
164
|
+
@overload
|
129
165
|
def re(
|
130
166
|
self,
|
131
|
-
regex:
|
167
|
+
regex: str | Pattern,
|
132
168
|
replace_entities: bool = True,
|
133
169
|
clean_match: bool = False,
|
134
170
|
case_sensitive: bool = True,
|
135
171
|
check_match: Literal[False] = False,
|
136
|
-
) -> "TextHandlers[TextHandler]":
|
137
|
-
...
|
172
|
+
) -> "TextHandlers[TextHandler]": ...
|
138
173
|
|
139
174
|
def re(
|
140
|
-
|
141
|
-
|
142
|
-
|
175
|
+
self,
|
176
|
+
regex: str | Pattern,
|
177
|
+
replace_entities: bool = True,
|
178
|
+
clean_match: bool = False,
|
179
|
+
case_sensitive: bool = True,
|
180
|
+
check_match: bool = False,
|
181
|
+
) -> Union["TextHandlers", bool]:
|
143
182
|
"""Apply the given regex to the current text and return a list of strings with the matches.
|
144
183
|
|
145
184
|
:param regex: Can be either a compiled regular expression or a string.
|
146
|
-
:param replace_entities:
|
147
|
-
:param clean_match:
|
148
|
-
:param case_sensitive:
|
149
|
-
:param check_match:
|
185
|
+
:param replace_entities: If enabled character entity references are replaced by their corresponding character
|
186
|
+
:param clean_match: If enabled, this will ignore all whitespaces and consecutive spaces while matching
|
187
|
+
:param case_sensitive: If disabled, function will set the regex to ignore the letters-case while compiling it
|
188
|
+
:param check_match: Used to quickly check if this regex matches or not without any operations on the results
|
150
189
|
|
151
190
|
"""
|
152
191
|
if isinstance(regex, str):
|
153
192
|
if case_sensitive:
|
154
|
-
regex =
|
193
|
+
regex = re_compile(regex, UNICODE)
|
155
194
|
else:
|
156
|
-
regex =
|
195
|
+
regex = re_compile(regex, flags=UNICODE | IGNORECASE)
|
157
196
|
|
158
197
|
input_text = self.clean() if clean_match else self
|
159
198
|
results = regex.findall(input_text)
|
@@ -164,22 +203,42 @@ class TextHandler(str):
|
|
164
203
|
results = flatten(results)
|
165
204
|
|
166
205
|
if not replace_entities:
|
167
|
-
return TextHandlers(
|
206
|
+
return TextHandlers(
|
207
|
+
cast(
|
208
|
+
List[_TextHandlerType], [TextHandler(string) for string in results]
|
209
|
+
)
|
210
|
+
)
|
168
211
|
|
169
|
-
return TextHandlers(
|
212
|
+
return TextHandlers(
|
213
|
+
cast(
|
214
|
+
List[_TextHandlerType],
|
215
|
+
[TextHandler(_replace_entities(s)) for s in results],
|
216
|
+
)
|
217
|
+
)
|
170
218
|
|
171
|
-
def re_first(
|
172
|
-
|
219
|
+
def re_first(
|
220
|
+
self,
|
221
|
+
regex: str | Pattern,
|
222
|
+
default=None,
|
223
|
+
replace_entities: bool = True,
|
224
|
+
clean_match: bool = False,
|
225
|
+
case_sensitive: bool = True,
|
226
|
+
) -> "TextHandler":
|
173
227
|
"""Apply the given regex to text and return the first match if found, otherwise return the default value.
|
174
228
|
|
175
229
|
:param regex: Can be either a compiled regular expression or a string.
|
176
230
|
:param default: The default value to be returned if there is no match
|
177
|
-
:param replace_entities:
|
178
|
-
:param clean_match:
|
179
|
-
:param case_sensitive:
|
231
|
+
:param replace_entities: If enabled character entity references are replaced by their corresponding character
|
232
|
+
:param clean_match: If enabled, this will ignore all whitespaces and consecutive spaces while matching
|
233
|
+
:param case_sensitive: If disabled, function will set the regex to ignore the letters-case while compiling it
|
180
234
|
|
181
235
|
"""
|
182
|
-
result = self.re(
|
236
|
+
result = self.re(
|
237
|
+
regex,
|
238
|
+
replace_entities,
|
239
|
+
clean_match=clean_match,
|
240
|
+
case_sensitive=case_sensitive,
|
241
|
+
)
|
183
242
|
return result[0] if result else default
|
184
243
|
|
185
244
|
|
@@ -187,48 +246,61 @@ class TextHandlers(List[TextHandler]):
|
|
187
246
|
"""
|
188
247
|
The :class:`TextHandlers` class is a subclass of the builtin ``List`` class, which provides a few additional methods.
|
189
248
|
"""
|
249
|
+
|
190
250
|
__slots__ = ()
|
191
251
|
|
192
|
-
@
|
193
|
-
def __getitem__(self, pos: SupportsIndex) -> TextHandler:
|
252
|
+
@overload
|
253
|
+
def __getitem__(self, pos: SupportsIndex) -> TextHandler: # pragma: no cover
|
194
254
|
pass
|
195
255
|
|
196
|
-
@
|
197
|
-
def __getitem__(self, pos: slice) -> "TextHandlers":
|
256
|
+
@overload
|
257
|
+
def __getitem__(self, pos: slice) -> "TextHandlers": # pragma: no cover
|
198
258
|
pass
|
199
259
|
|
200
|
-
def __getitem__(
|
260
|
+
def __getitem__(
|
261
|
+
self, pos: SupportsIndex | slice
|
262
|
+
) -> Union[TextHandler, "TextHandlers"]:
|
201
263
|
lst = super().__getitem__(pos)
|
202
264
|
if isinstance(pos, slice):
|
203
|
-
|
204
|
-
|
205
|
-
return typing.cast(_TextHandlerType, TextHandler(lst))
|
265
|
+
return TextHandlers(cast(List[_TextHandlerType], lst))
|
266
|
+
return cast(_TextHandlerType, TextHandler(lst))
|
206
267
|
|
207
|
-
def re(
|
208
|
-
|
268
|
+
def re(
|
269
|
+
self,
|
270
|
+
regex: str | Pattern,
|
271
|
+
replace_entities: bool = True,
|
272
|
+
clean_match: bool = False,
|
273
|
+
case_sensitive: bool = True,
|
274
|
+
) -> "TextHandlers[TextHandler]":
|
209
275
|
"""Call the ``.re()`` method for each element in this list and return
|
210
276
|
their results flattened as TextHandlers.
|
211
277
|
|
212
278
|
:param regex: Can be either a compiled regular expression or a string.
|
213
|
-
:param replace_entities:
|
279
|
+
:param replace_entities: If enabled character entity references are replaced by their corresponding character
|
214
280
|
:param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
|
215
|
-
:param case_sensitive: if disabled, function will set the regex to ignore letters
|
281
|
+
:param case_sensitive: if disabled, the function will set the regex to ignore the letters-case while compiling it
|
216
282
|
"""
|
217
283
|
results = [
|
218
284
|
n.re(regex, replace_entities, clean_match, case_sensitive) for n in self
|
219
285
|
]
|
220
286
|
return TextHandlers(flatten(results))
|
221
287
|
|
222
|
-
def re_first(
|
223
|
-
|
288
|
+
def re_first(
|
289
|
+
self,
|
290
|
+
regex: str | Pattern,
|
291
|
+
default=None,
|
292
|
+
replace_entities: bool = True,
|
293
|
+
clean_match: bool = False,
|
294
|
+
case_sensitive: bool = True,
|
295
|
+
) -> TextHandler: # pragma: no cover
|
224
296
|
"""Call the ``.re_first()`` method for each element in this list and return
|
225
297
|
the first result or the default value otherwise.
|
226
298
|
|
227
299
|
:param regex: Can be either a compiled regular expression or a string.
|
228
300
|
:param default: The default value to be returned if there is no match
|
229
|
-
:param replace_entities:
|
230
|
-
:param clean_match:
|
231
|
-
:param case_sensitive:
|
301
|
+
:param replace_entities: If enabled character entity references are replaced by their corresponding character
|
302
|
+
:param clean_match: If enabled, this will ignore all whitespaces and consecutive spaces while matching
|
303
|
+
:param case_sensitive: If disabled, function will set the regex to ignore the letters-case while compiling it
|
232
304
|
"""
|
233
305
|
for n in self:
|
234
306
|
for result in n.re(regex, replace_entities, clean_match, case_sensitive):
|
@@ -250,33 +322,44 @@ class TextHandlers(List[TextHandler]):
|
|
250
322
|
|
251
323
|
|
252
324
|
class AttributesHandler(Mapping[str, _TextHandlerType]):
|
253
|
-
"""A read-only mapping to use instead of the standard dictionary for the speed boost but at the same time I use it to add more functionalities.
|
254
|
-
|
325
|
+
"""A read-only mapping to use instead of the standard dictionary for the speed boost, but at the same time I use it to add more functionalities.
|
326
|
+
If the standard dictionary is needed, convert this class to a dictionary with the `dict` function
|
255
327
|
"""
|
256
|
-
|
328
|
+
|
329
|
+
__slots__ = ("_data",)
|
257
330
|
|
258
331
|
def __init__(self, mapping=None, **kwargs):
|
259
|
-
mapping =
|
260
|
-
|
261
|
-
|
262
|
-
|
332
|
+
mapping = (
|
333
|
+
{
|
334
|
+
key: TextHandler(value) if isinstance(value, str) else value
|
335
|
+
for key, value in mapping.items()
|
336
|
+
}
|
337
|
+
if mapping is not None
|
338
|
+
else {}
|
339
|
+
)
|
263
340
|
|
264
341
|
if kwargs:
|
265
|
-
mapping.update(
|
266
|
-
|
267
|
-
|
268
|
-
|
342
|
+
mapping.update(
|
343
|
+
{
|
344
|
+
key: TextHandler(value) if isinstance(value, str) else value
|
345
|
+
for key, value in kwargs.items()
|
346
|
+
}
|
347
|
+
)
|
269
348
|
|
270
349
|
# Fastest read-only mapping type
|
271
350
|
self._data = MappingProxyType(mapping)
|
272
351
|
|
273
|
-
def get(
|
274
|
-
|
352
|
+
def get(
|
353
|
+
self, key: str, default: Optional[str] = None
|
354
|
+
) -> Optional[_TextHandlerType]:
|
355
|
+
"""Acts like the standard dictionary `.get()` method"""
|
275
356
|
return self._data.get(key, default)
|
276
357
|
|
277
|
-
def search_values(
|
278
|
-
|
279
|
-
|
358
|
+
def search_values(
|
359
|
+
self, keyword: str, partial: bool = False
|
360
|
+
) -> Generator["AttributesHandler", None, None]:
|
361
|
+
"""Search current attributes by values and return a dictionary of each matching item
|
362
|
+
:param keyword: The keyword to search for in the attribute values
|
280
363
|
:param partial: If True, the function will search if keyword in each value instead of perfect match
|
281
364
|
"""
|
282
365
|
for key, value in self._data.items():
|
scrapling/core/mixins.py
CHANGED
@@ -1,32 +1,37 @@
|
|
1
|
-
|
2
1
|
class SelectorsGeneration:
|
3
|
-
"""
|
2
|
+
"""
|
3
|
+
Functions for generating selectors
|
4
4
|
Trying to generate selectors like Firefox or maybe cleaner ones!? Ehm
|
5
|
-
Inspiration: https://searchfox.org/mozilla-central/source/devtools/shared/inspector/css-logic.js#591
|
5
|
+
Inspiration: https://searchfox.org/mozilla-central/source/devtools/shared/inspector/css-logic.js#591
|
6
|
+
"""
|
6
7
|
|
7
|
-
def __general_selection(
|
8
|
+
def __general_selection(
|
9
|
+
self, selection: str = "css", full_path: bool = False
|
10
|
+
) -> str:
|
8
11
|
"""Generate a selector for the current element.
|
9
12
|
:return: A string of the generated selector.
|
10
13
|
"""
|
11
14
|
selectorPath = []
|
12
15
|
target = self
|
13
|
-
css = selection.lower() ==
|
16
|
+
css = selection.lower() == "css"
|
14
17
|
while target is not None:
|
15
18
|
if target.parent:
|
16
|
-
if target.attrib.get(
|
19
|
+
if target.attrib.get("id"):
|
17
20
|
# id is enough
|
18
21
|
part = (
|
19
|
-
f
|
22
|
+
f"#{target.attrib['id']}"
|
23
|
+
if css
|
20
24
|
else f"[@id='{target.attrib['id']}']"
|
21
25
|
)
|
22
26
|
selectorPath.append(part)
|
23
27
|
if not full_path:
|
24
28
|
return (
|
25
|
-
" > ".join(reversed(selectorPath))
|
26
|
-
|
29
|
+
" > ".join(reversed(selectorPath))
|
30
|
+
if css
|
31
|
+
else "//*" + "/".join(reversed(selectorPath))
|
27
32
|
)
|
28
33
|
else:
|
29
|
-
part = f
|
34
|
+
part = f"{target.tag}"
|
30
35
|
# We won't use classes anymore because I some websites share exact classes between elements
|
31
36
|
# classes = target.attrib.get('class', '').split()
|
32
37
|
# if classes and css:
|
@@ -41,23 +46,26 @@ class SelectorsGeneration:
|
|
41
46
|
|
42
47
|
if counter[target.tag] > 1:
|
43
48
|
part += (
|
44
|
-
f":nth-of-type({counter[target.tag]})"
|
49
|
+
f":nth-of-type({counter[target.tag]})"
|
50
|
+
if css
|
45
51
|
else f"[{counter[target.tag]}]"
|
46
52
|
)
|
47
53
|
|
48
54
|
selectorPath.append(part)
|
49
55
|
target = target.parent
|
50
|
-
if target is None or target.tag ==
|
56
|
+
if target is None or target.tag == "html":
|
51
57
|
return (
|
52
|
-
" > ".join(reversed(selectorPath))
|
53
|
-
|
58
|
+
" > ".join(reversed(selectorPath))
|
59
|
+
if css
|
60
|
+
else "//" + "/".join(reversed(selectorPath))
|
54
61
|
)
|
55
62
|
else:
|
56
63
|
break
|
57
64
|
|
58
65
|
return (
|
59
|
-
" > ".join(reversed(selectorPath))
|
60
|
-
|
66
|
+
" > ".join(reversed(selectorPath))
|
67
|
+
if css
|
68
|
+
else "//" + "/".join(reversed(selectorPath))
|
61
69
|
)
|
62
70
|
|
63
71
|
@property
|
@@ -76,14 +84,14 @@ class SelectorsGeneration:
|
|
76
84
|
|
77
85
|
@property
|
78
86
|
def generate_xpath_selector(self) -> str:
|
79
|
-
"""Generate
|
87
|
+
"""Generate an XPath selector for the current element
|
80
88
|
:return: A string of the generated selector.
|
81
89
|
"""
|
82
|
-
return self.__general_selection(
|
90
|
+
return self.__general_selection("xpath")
|
83
91
|
|
84
92
|
@property
|
85
93
|
def generate_full_xpath_selector(self) -> str:
|
86
94
|
"""Generate a complete XPath selector for the current element
|
87
95
|
:return: A string of the generated selector.
|
88
96
|
"""
|
89
|
-
return self.__general_selection(
|
97
|
+
return self.__general_selection("xpath", full_path=True)
|