scrapling 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- scrapling/__init__.py +4 -4
- scrapling/core/custom_types.py +88 -6
- scrapling/core/storage_adaptors.py +5 -6
- scrapling/core/translator.py +2 -2
- scrapling/core/utils.py +29 -27
- scrapling/defaults.py +2 -1
- scrapling/engines/camo.py +89 -15
- scrapling/engines/constants.py +4 -4
- scrapling/engines/pw.py +158 -83
- scrapling/engines/static.py +91 -48
- scrapling/engines/toolbelt/__init__.py +3 -3
- scrapling/engines/toolbelt/custom.py +20 -22
- scrapling/engines/toolbelt/fingerprints.py +3 -3
- scrapling/engines/toolbelt/navigation.py +21 -8
- scrapling/fetchers.py +229 -14
- scrapling/parser.py +49 -21
- {scrapling-0.2.8.dist-info → scrapling-0.2.9.dist-info}/METADATA +32 -16
- scrapling-0.2.9.dist-info/RECORD +47 -0
- tests/fetchers/async/__init__.py +0 -0
- tests/fetchers/async/test_camoufox.py +95 -0
- tests/fetchers/async/test_httpx.py +83 -0
- tests/fetchers/async/test_playwright.py +99 -0
- tests/fetchers/sync/__init__.py +0 -0
- tests/fetchers/sync/test_camoufox.py +68 -0
- tests/fetchers/sync/test_httpx.py +82 -0
- tests/fetchers/sync/test_playwright.py +87 -0
- tests/fetchers/test_utils.py +90 -122
- tests/parser/test_automatch.py +64 -9
- tests/parser/test_general.py +260 -218
- scrapling-0.2.8.dist-info/RECORD +0 -42
- tests/fetchers/test_camoufox.py +0 -65
- tests/fetchers/test_httpx.py +0 -68
- tests/fetchers/test_playwright.py +0 -77
- {scrapling-0.2.8.dist-info → scrapling-0.2.9.dist-info}/LICENSE +0 -0
- {scrapling-0.2.8.dist-info → scrapling-0.2.9.dist-info}/WHEEL +0 -0
- {scrapling-0.2.8.dist-info → scrapling-0.2.9.dist-info}/top_level.txt +0 -0
tests/parser/test_general.py
CHANGED
@@ -1,288 +1,330 @@
|
|
1
|
-
|
2
1
|
import pickle
|
3
|
-
import
|
2
|
+
import time
|
4
3
|
|
4
|
+
import pytest
|
5
5
|
from cssselect import SelectorError, SelectorSyntaxError
|
6
6
|
|
7
7
|
from scrapling import Adaptor
|
8
8
|
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
10
|
+
@pytest.fixture
|
11
|
+
def html_content():
|
12
|
+
return '''
|
13
|
+
<html>
|
14
|
+
<head>
|
15
|
+
<title>Complex Web Page</title>
|
16
|
+
<style>
|
17
|
+
.hidden { display: none; }
|
18
|
+
</style>
|
19
|
+
</head>
|
20
|
+
<body>
|
21
|
+
<header>
|
22
|
+
<nav>
|
23
|
+
<ul>
|
24
|
+
<li><a href="#home">Home</a></li>
|
25
|
+
<li><a href="#about">About</a></li>
|
26
|
+
<li><a href="#contact">Contact</a></li>
|
27
|
+
</ul>
|
28
|
+
</nav>
|
29
|
+
</header>
|
30
|
+
<main>
|
31
|
+
<section id="products" schema='{"jsonable": "data"}'>
|
32
|
+
<h2>Products</h2>
|
33
|
+
<div class="product-list">
|
34
|
+
<article class="product" data-id="1">
|
35
|
+
<h3>Product 1</h3>
|
36
|
+
<p class="description">This is product 1</p>
|
37
|
+
<span class="price">$10.99</span>
|
38
|
+
<div class="hidden stock">In stock: 5</div>
|
39
|
+
</article>
|
40
|
+
<article class="product" data-id="2">
|
41
|
+
<h3>Product 2</h3>
|
42
|
+
<p class="description">This is product 2</p>
|
43
|
+
<span class="price">$20.99</span>
|
44
|
+
<div class="hidden stock">In stock: 3</div>
|
45
|
+
</article>
|
46
|
+
<article class="product" data-id="3">
|
47
|
+
<h3>Product 3</h3>
|
48
|
+
<p class="description">This is product 3</p>
|
49
|
+
<span class="price">$15.99</span>
|
50
|
+
<div class="hidden stock">Out of stock</div>
|
51
|
+
</article>
|
52
|
+
</div>
|
53
|
+
</section>
|
54
|
+
<section id="reviews">
|
55
|
+
<h2>Customer Reviews</h2>
|
56
|
+
<div class="review-list">
|
57
|
+
<div class="review" data-rating="5">
|
58
|
+
<p class="review-text">Great product!</p>
|
59
|
+
<span class="reviewer">John Doe</span>
|
52
60
|
</div>
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
<div class="review-list">
|
57
|
-
<div class="review" data-rating="5">
|
58
|
-
<p class="review-text">Great product!</p>
|
59
|
-
<span class="reviewer">John Doe</span>
|
60
|
-
</div>
|
61
|
-
<div class="review" data-rating="4">
|
62
|
-
<p class="review-text">Good value for money.</p>
|
63
|
-
<span class="reviewer">Jane Smith</span>
|
64
|
-
</div>
|
61
|
+
<div class="review" data-rating="4">
|
62
|
+
<p class="review-text">Good value for money.</p>
|
63
|
+
<span class="reviewer">Jane Smith</span>
|
65
64
|
</div>
|
66
|
-
</
|
67
|
-
</
|
68
|
-
|
69
|
-
|
70
|
-
</
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
</
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
65
|
+
</div>
|
66
|
+
</section>
|
67
|
+
</main>
|
68
|
+
<footer>
|
69
|
+
<p>© 2024 Our Company</p>
|
70
|
+
</footer>
|
71
|
+
<script id="page-data" type="application/json">
|
72
|
+
{"lastUpdated": "2024-09-22T10:30:00Z", "totalProducts": 3}
|
73
|
+
</script>
|
74
|
+
</body>
|
75
|
+
</html>
|
76
|
+
'''
|
77
|
+
|
78
|
+
|
79
|
+
@pytest.fixture
|
80
|
+
def page(html_content):
|
81
|
+
return Adaptor(html_content, auto_match=False)
|
82
|
+
|
83
|
+
|
84
|
+
# CSS Selector Tests
|
85
|
+
class TestCSSSelectors:
|
86
|
+
def test_basic_product_selection(self, page):
|
87
|
+
"""Test selecting all product elements"""
|
88
|
+
elements = page.css('main #products .product-list article.product')
|
89
|
+
assert len(elements) == 3
|
90
|
+
|
91
|
+
def test_in_stock_product_selection(self, page):
|
92
|
+
"""Test selecting in-stock products"""
|
93
|
+
in_stock_products = page.css(
|
85
94
|
'main #products .product-list article.product:not(:contains("Out of stock"))')
|
86
|
-
|
95
|
+
assert len(in_stock_products) == 2
|
96
|
+
|
87
97
|
|
88
|
-
|
89
|
-
|
90
|
-
|
98
|
+
# XPath Selector Tests
|
99
|
+
class TestXPathSelectors:
|
100
|
+
def test_high_rating_reviews(self, page):
|
101
|
+
"""Test selecting reviews with high ratings"""
|
102
|
+
reviews = page.xpath(
|
91
103
|
'//section[@id="reviews"]//div[contains(@class, "review") and @data-rating >= 4]'
|
92
104
|
)
|
93
|
-
|
105
|
+
assert len(reviews) == 2
|
94
106
|
|
95
|
-
|
107
|
+
def test_high_priced_products(self, page):
|
108
|
+
"""Test selecting products above a certain price"""
|
109
|
+
high_priced_products = page.xpath(
|
96
110
|
'//article[contains(@class, "product")]'
|
97
111
|
'[number(translate(substring-after(.//span[@class="price"], "$"), ",", "")) > 15]'
|
98
112
|
)
|
99
|
-
|
113
|
+
assert len(high_priced_products) == 2
|
114
|
+
|
115
|
+
|
116
|
+
# Text Matching Tests
|
117
|
+
class TestTextMatching:
|
118
|
+
def test_regex_multiple_matches(self, page):
|
119
|
+
"""Test finding multiple matches with regex"""
|
120
|
+
stock_info = page.find_by_regex(r'In stock: \d+', first_match=False)
|
121
|
+
assert len(stock_info) == 2
|
100
122
|
|
101
|
-
def
|
102
|
-
"""Test
|
103
|
-
stock_info =
|
104
|
-
|
123
|
+
def test_regex_first_match(self, page):
|
124
|
+
"""Test finding the first match with regex"""
|
125
|
+
stock_info = page.find_by_regex(r'In stock: \d+', first_match=True, case_sensitive=True)
|
126
|
+
assert stock_info.text == 'In stock: 5'
|
105
127
|
|
106
|
-
|
107
|
-
|
128
|
+
def test_partial_text_match(self, page):
|
129
|
+
"""Test finding elements with partial text match"""
|
130
|
+
stock_info = page.find_by_text(r'In stock:', partial=True, first_match=False)
|
131
|
+
assert len(stock_info) == 2
|
108
132
|
|
109
|
-
|
110
|
-
|
133
|
+
def test_exact_text_match(self, page):
|
134
|
+
"""Test finding elements with exact text match"""
|
135
|
+
out_of_stock = page.find_by_text('Out of stock', partial=False, first_match=False)
|
136
|
+
assert len(out_of_stock) == 1
|
111
137
|
|
112
|
-
out_of_stock = self.page.find_by_text('Out of stock', partial=False, first_match=False)
|
113
|
-
self.assertEqual(len(out_of_stock), 1)
|
114
138
|
|
115
|
-
|
116
|
-
|
117
|
-
|
139
|
+
# Similar Elements Tests
|
140
|
+
class TestSimilarElements:
|
141
|
+
def test_finding_similar_products(self, page):
|
142
|
+
"""Test finding similar product elements"""
|
143
|
+
first_product = page.css_first('.product')
|
118
144
|
similar_products = first_product.find_similar()
|
119
|
-
|
145
|
+
assert len(similar_products) == 2
|
120
146
|
|
121
|
-
|
147
|
+
def test_finding_similar_reviews(self, page):
|
148
|
+
"""Test finding similar review elements with additional filtering"""
|
149
|
+
first_review = page.find('div', class_='review')
|
122
150
|
similar_high_rated_reviews = [
|
123
151
|
review
|
124
152
|
for review in first_review.find_similar()
|
125
153
|
if int(review.attrib.get('data-rating', 0)) >= 4
|
126
154
|
]
|
127
|
-
|
155
|
+
assert len(similar_high_rated_reviews) == 1
|
128
156
|
|
129
|
-
|
130
|
-
|
131
|
-
|
157
|
+
|
158
|
+
# Error Handling Tests
|
159
|
+
class TestErrorHandling:
|
160
|
+
def test_invalid_adaptor_initialization(self):
|
161
|
+
"""Test various invalid Adaptor initializations"""
|
162
|
+
# No arguments
|
163
|
+
with pytest.raises(ValueError):
|
132
164
|
_ = Adaptor(auto_match=False)
|
133
165
|
|
134
|
-
|
166
|
+
# Invalid argument types
|
167
|
+
with pytest.raises(TypeError):
|
135
168
|
_ = Adaptor(root="ayo", auto_match=False)
|
136
169
|
|
137
|
-
with
|
170
|
+
with pytest.raises(TypeError):
|
138
171
|
_ = Adaptor(text=1, auto_match=False)
|
139
172
|
|
140
|
-
with
|
173
|
+
with pytest.raises(TypeError):
|
141
174
|
_ = Adaptor(body=1, auto_match=False)
|
142
175
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
"""Test that objects aren't pickleable"""
|
148
|
-
table = self.page.css('.product-list')[0]
|
149
|
-
with self.assertRaises(TypeError): # Adaptors
|
150
|
-
pickle.dumps(table)
|
151
|
-
|
152
|
-
with self.assertRaises(TypeError): # Adaptor
|
153
|
-
pickle.dumps(table[0])
|
154
|
-
|
155
|
-
def test_overridden(self):
|
156
|
-
"""Test overridden functions"""
|
157
|
-
table = self.page.css('.product-list')[0]
|
158
|
-
self.assertTrue(issubclass(type(table.__str__()), str))
|
159
|
-
self.assertTrue(issubclass(type(table.__repr__()), str))
|
160
|
-
self.assertTrue(issubclass(type(table.attrib.__str__()), str))
|
161
|
-
self.assertTrue(issubclass(type(table.attrib.__repr__()), str))
|
162
|
-
|
163
|
-
def test_bad_selector(self):
|
164
|
-
"""Test object can handle bad selector"""
|
165
|
-
with self.assertRaises((SelectorError, SelectorSyntaxError,)):
|
166
|
-
self.page.css('4 ayo')
|
176
|
+
def test_invalid_storage(self, page, html_content):
|
177
|
+
"""Test invalid storage parameter"""
|
178
|
+
with pytest.raises(ValueError):
|
179
|
+
_ = Adaptor(html_content, storage=object, auto_match=True)
|
167
180
|
|
168
|
-
|
169
|
-
|
181
|
+
def test_bad_selectors(self, page):
|
182
|
+
"""Test handling of invalid selectors"""
|
183
|
+
with pytest.raises((SelectorError, SelectorSyntaxError)):
|
184
|
+
page.css('4 ayo')
|
170
185
|
|
171
|
-
|
172
|
-
|
173
|
-
def _traverse(element: Adaptor):
|
174
|
-
self.assertTrue(type(element.generate_css_selector) is str)
|
175
|
-
self.assertTrue(type(element.generate_xpath_selector) is str)
|
176
|
-
for branch in element.children:
|
177
|
-
_traverse(branch)
|
186
|
+
with pytest.raises((SelectorError, SelectorSyntaxError)):
|
187
|
+
page.xpath('4 ayo')
|
178
188
|
|
179
|
-
_traverse(self.page)
|
180
189
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
190
|
+
# Pickling and Object Representation Tests
|
191
|
+
class TestPicklingAndRepresentation:
|
192
|
+
def test_unpickleable_objects(self, page):
|
193
|
+
"""Test that Adaptor objects cannot be pickled"""
|
194
|
+
table = page.css('.product-list')[0]
|
195
|
+
with pytest.raises(TypeError):
|
196
|
+
pickle.dumps(table)
|
188
197
|
|
189
|
-
|
190
|
-
|
191
|
-
self.assertNotEqual(table.prettify(), '')
|
198
|
+
with pytest.raises(TypeError):
|
199
|
+
pickle.dumps(table[0])
|
192
200
|
|
201
|
+
def test_string_representations(self, page):
|
202
|
+
"""Test custom string representations of objects"""
|
203
|
+
table = page.css('.product-list')[0]
|
204
|
+
assert issubclass(type(table.__str__()), str)
|
205
|
+
assert issubclass(type(table.__repr__()), str)
|
206
|
+
assert issubclass(type(table.attrib.__str__()), str)
|
207
|
+
assert issubclass(type(table.attrib.__repr__()), str)
|
208
|
+
|
209
|
+
|
210
|
+
# Navigation and Traversal Tests
|
211
|
+
class TestElementNavigation:
|
212
|
+
def test_basic_navigation_properties(self, page):
|
213
|
+
"""Test basic navigation properties of elements"""
|
214
|
+
table = page.css('.product-list')[0]
|
215
|
+
assert table.path is not None
|
216
|
+
assert table.html_content != ''
|
217
|
+
assert table.prettify() != ''
|
218
|
+
|
219
|
+
def test_parent_and_sibling_navigation(self, page):
|
220
|
+
"""Test parent and sibling navigation"""
|
221
|
+
table = page.css('.product-list')[0]
|
193
222
|
parent = table.parent
|
194
|
-
|
195
|
-
|
196
|
-
children = table.children
|
197
|
-
self.assertEqual(len(children), 3)
|
223
|
+
assert parent.attrib['id'] == 'products'
|
198
224
|
|
199
225
|
parent_siblings = parent.siblings
|
200
|
-
|
226
|
+
assert len(parent_siblings) == 1
|
227
|
+
|
228
|
+
def test_child_navigation(self, page):
|
229
|
+
"""Test child navigation"""
|
230
|
+
table = page.css('.product-list')[0]
|
231
|
+
children = table.children
|
232
|
+
assert len(children) == 3
|
201
233
|
|
202
|
-
|
234
|
+
def test_next_and_previous_navigation(self, page):
|
235
|
+
"""Test next and previous element navigation"""
|
236
|
+
child = page.css('.product-list')[0].find({'data-id': "1"})
|
203
237
|
next_element = child.next
|
204
|
-
|
238
|
+
assert next_element.attrib['data-id'] == '2'
|
205
239
|
|
206
240
|
prev_element = next_element.previous
|
207
|
-
|
241
|
+
assert prev_element.tag == child.tag
|
208
242
|
|
209
|
-
|
243
|
+
def test_ancestor_finding(self, page):
|
244
|
+
"""Test finding ancestors of elements"""
|
245
|
+
all_prices = page.css('.price')
|
210
246
|
products_with_prices = [
|
211
247
|
price.find_ancestor(lambda p: p.has_class('product'))
|
212
248
|
for price in all_prices
|
213
249
|
]
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
soup = Adaptor(test_html, auto_match=False, keep_comments=False)
|
224
|
-
html_tag = soup.css('html')[0]
|
225
|
-
self.assertEqual(html_tag.path, [])
|
226
|
-
self.assertEqual(html_tag.siblings, [])
|
227
|
-
self.assertEqual(html_tag.parent, None)
|
228
|
-
self.assertEqual(html_tag.find_ancestor(lambda e: e), None)
|
229
|
-
|
230
|
-
self.assertEqual(soup.css('#a a')[0].next, None)
|
231
|
-
self.assertEqual(soup.css('#b a')[0].previous, None)
|
232
|
-
|
233
|
-
def test_text_to_json(self):
|
234
|
-
"""Test converting text to json"""
|
235
|
-
script_content = self.page.css('#page-data::text')[0]
|
236
|
-
self.assertTrue(issubclass(type(script_content.sort()), str))
|
250
|
+
assert len(products_with_prices) == 3
|
251
|
+
|
252
|
+
|
253
|
+
# JSON and Attribute Tests
|
254
|
+
class TestJSONAndAttributes:
|
255
|
+
def test_json_conversion(self, page):
|
256
|
+
"""Test converting content to JSON"""
|
257
|
+
script_content = page.css('#page-data::text')[0]
|
258
|
+
assert issubclass(type(script_content.sort()), str)
|
237
259
|
page_data = script_content.json()
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
def
|
242
|
-
"""Test
|
243
|
-
|
244
|
-
|
245
|
-
self.assertEqual(match, '10.99')
|
246
|
-
match = element.text.re(r'(\d+)', replace_entities=False)
|
247
|
-
self.assertEqual(len(match), 2)
|
248
|
-
|
249
|
-
def test_attribute_operations(self):
|
250
|
-
"""Test operations on elements attributes"""
|
251
|
-
products = self.page.css('.product')
|
260
|
+
assert page_data['totalProducts'] == 3
|
261
|
+
assert 'lastUpdated' in page_data
|
262
|
+
|
263
|
+
def test_attribute_operations(self, page):
|
264
|
+
"""Test various attribute-related operations"""
|
265
|
+
# Product ID extraction
|
266
|
+
products = page.css('.product')
|
252
267
|
product_ids = [product.attrib['data-id'] for product in products]
|
253
|
-
|
254
|
-
|
268
|
+
assert product_ids == ['1', '2', '3']
|
269
|
+
assert 'data-id' in products[0].attrib
|
255
270
|
|
256
|
-
|
271
|
+
# Review rating calculations
|
272
|
+
reviews = page.css('.review')
|
257
273
|
review_ratings = [int(review.attrib['data-rating']) for review in reviews]
|
258
|
-
|
274
|
+
assert sum(review_ratings) / len(review_ratings) == 4.5
|
259
275
|
|
276
|
+
# Attribute searching
|
260
277
|
key_value = list(products[0].attrib.search_values('1', partial=False))
|
261
|
-
|
278
|
+
assert list(key_value[0].keys()) == ['data-id']
|
262
279
|
|
263
280
|
key_value = list(products[0].attrib.search_values('1', partial=True))
|
264
|
-
|
281
|
+
assert list(key_value[0].keys()) == ['data-id']
|
282
|
+
|
283
|
+
# JSON attribute conversion
|
284
|
+
attr_json = page.css_first('#products').attrib['schema'].json()
|
285
|
+
assert attr_json == {'jsonable': 'data'}
|
286
|
+
assert isinstance(page.css('#products')[0].attrib.json_string, bytes)
|
287
|
+
|
288
|
+
|
289
|
+
# Performance Test
|
290
|
+
def test_large_html_parsing_performance():
|
291
|
+
"""Test parsing and selecting performance on large HTML"""
|
292
|
+
large_html = '<html><body>' + '<div class="item">' * 5000 + '</div>' * 5000 + '</body></html>'
|
293
|
+
|
294
|
+
start_time = time.time()
|
295
|
+
parsed = Adaptor(large_html, auto_match=False)
|
296
|
+
elements = parsed.css('.item')
|
297
|
+
end_time = time.time()
|
298
|
+
|
299
|
+
assert len(elements) == 5000
|
300
|
+
# Converting 5000 elements to a class and doing operations on them will take time
|
301
|
+
# Based on my tests with 100 runs, 1 loop each Scrapling (given the extra work/features) takes 10.4ms on average
|
302
|
+
assert end_time - start_time < 0.5 # Locally I test on 0.1 but on GitHub actions with browsers and threading sometimes closing adds fractions of seconds
|
303
|
+
|
304
|
+
|
305
|
+
# Selector Generation Test
|
306
|
+
def test_selectors_generation(page):
|
307
|
+
"""Try to create selectors for all elements in the page"""
|
265
308
|
|
266
|
-
|
267
|
-
|
268
|
-
|
309
|
+
def _traverse(element: Adaptor):
|
310
|
+
assert isinstance(element.generate_css_selector, str)
|
311
|
+
assert isinstance(element.generate_xpath_selector, str)
|
312
|
+
for branch in element.children:
|
313
|
+
_traverse(branch)
|
269
314
|
|
270
|
-
|
271
|
-
"""Test parsing and selecting speed"""
|
272
|
-
import time
|
273
|
-
large_html = '<html><body>' + '<div class="item">' * 5000 + '</div>' * 5000 + '</body></html>'
|
315
|
+
_traverse(page)
|
274
316
|
|
275
|
-
start_time = time.time()
|
276
|
-
parsed = Adaptor(large_html, auto_match=False, debug=False)
|
277
|
-
elements = parsed.css('.item')
|
278
|
-
end_time = time.time()
|
279
317
|
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
318
|
+
# Miscellaneous Tests
|
319
|
+
def test_getting_all_text(page):
|
320
|
+
"""Test getting all text from the page"""
|
321
|
+
assert page.get_all_text() != ''
|
284
322
|
|
285
323
|
|
286
|
-
|
287
|
-
|
288
|
-
|
324
|
+
def test_regex_on_text(page):
|
325
|
+
"""Test regex operations on text"""
|
326
|
+
element = page.css('[data-id="1"] .price')[0]
|
327
|
+
match = element.re_first(r'[\.\d]+')
|
328
|
+
assert match == '10.99'
|
329
|
+
match = element.text.re(r'(\d+)', replace_entities=False)
|
330
|
+
assert len(match) == 2
|
scrapling-0.2.8.dist-info/RECORD
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
scrapling/__init__.py,sha256=0-gw4uqckCs7ikl6sHiB5c6y0AelpgefqJkBmSd7j1k,469
|
2
|
-
scrapling/defaults.py,sha256=qO6zAS7k5_QXvbjuoBv87fUMqASGMuM2dVry9J9auv0,287
|
3
|
-
scrapling/fetchers.py,sha256=iw1wEuFg14akJYpSg9webfBjAL341Pnofn4IkWahGlE,17486
|
4
|
-
scrapling/parser.py,sha256=suXggr39GimLnnLm9ivM1CQ40AoDwGke2sgnWszqFqk,54331
|
5
|
-
scrapling/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
|
6
|
-
scrapling/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
scrapling/core/_types.py,sha256=__HJ2JTk5vx5eg_7HAJmDjaHrMDIaoxNG8fadLLyKV8,566
|
8
|
-
scrapling/core/custom_types.py,sha256=8GCgcZL-IT5lP6titxL-RPCiItQSuJZjSlFIGCDxoSs,8402
|
9
|
-
scrapling/core/mixins.py,sha256=sozbpaGL1_O_x3U-ABM5aYWpnxpCLfdbcA9SG3P7weY,3532
|
10
|
-
scrapling/core/storage_adaptors.py,sha256=Q2-G7oDqoIqlIBEmnUsKwSzM2lNGNUPKtTbMjTV9178,6218
|
11
|
-
scrapling/core/translator.py,sha256=WN_xPyYrD1MjLPv8Ar8zHNTPC_iYsW29kkjET4hbFI0,5228
|
12
|
-
scrapling/core/utils.py,sha256=RajDRSPkVmszjpwNy8NIz8ZlUxPox8j2rSractr7Q9s,3779
|
13
|
-
scrapling/engines/__init__.py,sha256=zA7tzqcDXP0hllwmjVewNHWipIA4JSU9mRG4J-cud0c,267
|
14
|
-
scrapling/engines/camo.py,sha256=fmpGMW5T7we5cQC8muyvVo_A27yAqc5csm7dO_2jHiE,8446
|
15
|
-
scrapling/engines/constants.py,sha256=WTn-X4kFIDWjXTiqOT0tm4XT5pijcdohFyZ0Af2C5Xc,3723
|
16
|
-
scrapling/engines/pw.py,sha256=kWbkHm2vnQYeGuJnicKlAL1HrBKuXoFtyRMNFXLs4VY,13962
|
17
|
-
scrapling/engines/static.py,sha256=h629IjT78YbhjFYBVSli53lKiYrG3929TAaZ7TA-j-Y,8022
|
18
|
-
scrapling/engines/toolbelt/__init__.py,sha256=0tSsxMH5ALOMPXrLkr8mTH7LWg9QfIse4Ij9vUFgYjY,391
|
19
|
-
scrapling/engines/toolbelt/custom.py,sha256=tab_wJmN6onvu2U8tDXeJ9jn6A47jTkmxSBoc-w8dIk,12789
|
20
|
-
scrapling/engines/toolbelt/fingerprints.py,sha256=Y3FW8uqxxeNK3v6vBVvki8VjeG5oRxSwim4Q2Hv_cRk,2917
|
21
|
-
scrapling/engines/toolbelt/navigation.py,sha256=Okpl4ynlLn2cUpSiaaoXDSOdDOXhvxNOOGphE_HXc5k,4016
|
22
|
-
scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
|
23
|
-
scrapling/engines/toolbelt/bypasses/notification_permission.js,sha256=poPM3o5WYgEX-EdiUfDCllpWfc3Umvw4jr2u6O6elus,237
|
24
|
-
scrapling/engines/toolbelt/bypasses/pdf_viewer.js,sha256=mKjjSuP1-BOGC_2WhRYHJo_LP7lTBi2KXmP_zsHO_tI,173
|
25
|
-
scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js,sha256=3RP1AE_XZRvpupeV_i-WSNVqRxyUy0qd8rQV8j_4j3U,221
|
26
|
-
scrapling/engines/toolbelt/bypasses/screen_props.js,sha256=fZEuHMQ1-fYuxxUMoQXUvVWYUkPUbblkfMfpiLvBY7w,599
|
27
|
-
scrapling/engines/toolbelt/bypasses/webdriver_fully.js,sha256=hdJw4clRAJQqIdq5gIFC_eC-x7C1i2ab01KV5ylmOBs,728
|
28
|
-
scrapling/engines/toolbelt/bypasses/window_chrome.js,sha256=D7hqzNGGDorh8JVlvm2YIv7Bk2CoVkG55MDIdyqhT1w,6808
|
29
|
-
tests/__init__.py,sha256=YHFB5ftzgLQVh6gbPfbYcY4yOS9DOBp5dBa6I-qtm8U,32
|
30
|
-
tests/fetchers/__init__.py,sha256=6H4NgARhyTcGGd3dNCKQJ8kUFdrAEMSScQL7Ga_vU3c,43
|
31
|
-
tests/fetchers/test_camoufox.py,sha256=-1v_0mXeBcAVW932nkFws1HIDCodGbpNYniSnVMHeeU,3116
|
32
|
-
tests/fetchers/test_httpx.py,sha256=rrw9q4KdDAHpQVa4sTmw278Yv1OlwY_SKPbpBPLVN7c,3508
|
33
|
-
tests/fetchers/test_playwright.py,sha256=xwhRmlw7WBrtqyilZsoMHkHpyAx7iXQ-YexDMJURTao,3702
|
34
|
-
tests/fetchers/test_utils.py,sha256=FPPJkBrqgYxdGeWwapH8Vj8zyfYVLiTE1qSLu8eBWik,5728
|
35
|
-
tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
36
|
-
tests/parser/test_automatch.py,sha256=BeeYJi3cYCghbiZmi57z4bqcGPaoUA8GAm7MALBBkkk,2486
|
37
|
-
tests/parser/test_general.py,sha256=sPbwQRka9Mh8MDz2Sto8Rwg78t0SWWxELgzhTVPEplE,11785
|
38
|
-
scrapling-0.2.8.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
|
39
|
-
scrapling-0.2.8.dist-info/METADATA,sha256=0As--zWykpljObaw8DZQJr6udpHm4NyRN-dfUOUrhBc,66605
|
40
|
-
scrapling-0.2.8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
41
|
-
scrapling-0.2.8.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
|
42
|
-
scrapling-0.2.8.dist-info/RECORD,,
|