scrapling 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. scrapling/__init__.py +4 -4
  2. scrapling/core/custom_types.py +88 -6
  3. scrapling/core/storage_adaptors.py +5 -6
  4. scrapling/core/translator.py +2 -2
  5. scrapling/core/utils.py +29 -27
  6. scrapling/defaults.py +2 -1
  7. scrapling/engines/camo.py +89 -15
  8. scrapling/engines/constants.py +4 -4
  9. scrapling/engines/pw.py +158 -83
  10. scrapling/engines/static.py +91 -48
  11. scrapling/engines/toolbelt/__init__.py +3 -3
  12. scrapling/engines/toolbelt/custom.py +20 -22
  13. scrapling/engines/toolbelt/fingerprints.py +3 -3
  14. scrapling/engines/toolbelt/navigation.py +21 -8
  15. scrapling/fetchers.py +229 -14
  16. scrapling/parser.py +49 -21
  17. {scrapling-0.2.8.dist-info → scrapling-0.2.9.dist-info}/METADATA +32 -16
  18. scrapling-0.2.9.dist-info/RECORD +47 -0
  19. tests/fetchers/async/__init__.py +0 -0
  20. tests/fetchers/async/test_camoufox.py +95 -0
  21. tests/fetchers/async/test_httpx.py +83 -0
  22. tests/fetchers/async/test_playwright.py +99 -0
  23. tests/fetchers/sync/__init__.py +0 -0
  24. tests/fetchers/sync/test_camoufox.py +68 -0
  25. tests/fetchers/sync/test_httpx.py +82 -0
  26. tests/fetchers/sync/test_playwright.py +87 -0
  27. tests/fetchers/test_utils.py +90 -122
  28. tests/parser/test_automatch.py +64 -9
  29. tests/parser/test_general.py +260 -218
  30. scrapling-0.2.8.dist-info/RECORD +0 -42
  31. tests/fetchers/test_camoufox.py +0 -65
  32. tests/fetchers/test_httpx.py +0 -68
  33. tests/fetchers/test_playwright.py +0 -77
  34. {scrapling-0.2.8.dist-info → scrapling-0.2.9.dist-info}/LICENSE +0 -0
  35. {scrapling-0.2.8.dist-info → scrapling-0.2.9.dist-info}/WHEEL +0 -0
  36. {scrapling-0.2.8.dist-info → scrapling-0.2.9.dist-info}/top_level.txt +0 -0
@@ -1,288 +1,330 @@
1
-
2
1
  import pickle
3
- import unittest
2
+ import time
4
3
 
4
+ import pytest
5
5
  from cssselect import SelectorError, SelectorSyntaxError
6
6
 
7
7
  from scrapling import Adaptor
8
8
 
9
9
 
10
- class TestParser(unittest.TestCase):
11
- def setUp(self):
12
- self.html = '''
13
- <html>
14
- <head>
15
- <title>Complex Web Page</title>
16
- <style>
17
- .hidden { display: none; }
18
- </style>
19
- </head>
20
- <body>
21
- <header>
22
- <nav>
23
- <ul>
24
- <li><a href="#home">Home</a></li>
25
- <li><a href="#about">About</a></li>
26
- <li><a href="#contact">Contact</a></li>
27
- </ul>
28
- </nav>
29
- </header>
30
- <main>
31
- <section id="products" schema='{"jsonable": "data"}'>
32
- <h2>Products</h2>
33
- <div class="product-list">
34
- <article class="product" data-id="1">
35
- <h3>Product 1</h3>
36
- <p class="description">This is product 1</p>
37
- <span class="price">$10.99</span>
38
- <div class="hidden stock">In stock: 5</div>
39
- </article>
40
- <article class="product" data-id="2">
41
- <h3>Product 2</h3>
42
- <p class="description">This is product 2</p>
43
- <span class="price">$20.99</span>
44
- <div class="hidden stock">In stock: 3</div>
45
- </article>
46
- <article class="product" data-id="3">
47
- <h3>Product 3</h3>
48
- <p class="description">This is product 3</p>
49
- <span class="price">$15.99</span>
50
- <div class="hidden stock">Out of stock</div>
51
- </article>
10
+ @pytest.fixture
11
+ def html_content():
12
+ return '''
13
+ <html>
14
+ <head>
15
+ <title>Complex Web Page</title>
16
+ <style>
17
+ .hidden { display: none; }
18
+ </style>
19
+ </head>
20
+ <body>
21
+ <header>
22
+ <nav>
23
+ <ul>
24
+ <li><a href="#home">Home</a></li>
25
+ <li><a href="#about">About</a></li>
26
+ <li><a href="#contact">Contact</a></li>
27
+ </ul>
28
+ </nav>
29
+ </header>
30
+ <main>
31
+ <section id="products" schema='{"jsonable": "data"}'>
32
+ <h2>Products</h2>
33
+ <div class="product-list">
34
+ <article class="product" data-id="1">
35
+ <h3>Product 1</h3>
36
+ <p class="description">This is product 1</p>
37
+ <span class="price">$10.99</span>
38
+ <div class="hidden stock">In stock: 5</div>
39
+ </article>
40
+ <article class="product" data-id="2">
41
+ <h3>Product 2</h3>
42
+ <p class="description">This is product 2</p>
43
+ <span class="price">$20.99</span>
44
+ <div class="hidden stock">In stock: 3</div>
45
+ </article>
46
+ <article class="product" data-id="3">
47
+ <h3>Product 3</h3>
48
+ <p class="description">This is product 3</p>
49
+ <span class="price">$15.99</span>
50
+ <div class="hidden stock">Out of stock</div>
51
+ </article>
52
+ </div>
53
+ </section>
54
+ <section id="reviews">
55
+ <h2>Customer Reviews</h2>
56
+ <div class="review-list">
57
+ <div class="review" data-rating="5">
58
+ <p class="review-text">Great product!</p>
59
+ <span class="reviewer">John Doe</span>
52
60
  </div>
53
- </section>
54
- <section id="reviews">
55
- <h2>Customer Reviews</h2>
56
- <div class="review-list">
57
- <div class="review" data-rating="5">
58
- <p class="review-text">Great product!</p>
59
- <span class="reviewer">John Doe</span>
60
- </div>
61
- <div class="review" data-rating="4">
62
- <p class="review-text">Good value for money.</p>
63
- <span class="reviewer">Jane Smith</span>
64
- </div>
61
+ <div class="review" data-rating="4">
62
+ <p class="review-text">Good value for money.</p>
63
+ <span class="reviewer">Jane Smith</span>
65
64
  </div>
66
- </section>
67
- </main>
68
- <footer>
69
- <p>&copy; 2024 Our Company</p>
70
- </footer>
71
- <script id="page-data" type="application/json">
72
- {"lastUpdated": "2024-09-22T10:30:00Z", "totalProducts": 3}
73
- </script>
74
- </body>
75
- </html>
76
- '''
77
- self.page = Adaptor(self.html, auto_match=False, debug=False)
78
-
79
- def test_css_selector(self):
80
- """Test Selecting elements with complex CSS selectors"""
81
- elements = self.page.css('main #products .product-list article.product')
82
- self.assertEqual(len(elements), 3)
83
-
84
- in_stock_products = self.page.css(
65
+ </div>
66
+ </section>
67
+ </main>
68
+ <footer>
69
+ <p>&copy; 2024 Our Company</p>
70
+ </footer>
71
+ <script id="page-data" type="application/json">
72
+ {"lastUpdated": "2024-09-22T10:30:00Z", "totalProducts": 3}
73
+ </script>
74
+ </body>
75
+ </html>
76
+ '''
77
+
78
+
79
+ @pytest.fixture
80
+ def page(html_content):
81
+ return Adaptor(html_content, auto_match=False)
82
+
83
+
84
+ # CSS Selector Tests
85
+ class TestCSSSelectors:
86
+ def test_basic_product_selection(self, page):
87
+ """Test selecting all product elements"""
88
+ elements = page.css('main #products .product-list article.product')
89
+ assert len(elements) == 3
90
+
91
+ def test_in_stock_product_selection(self, page):
92
+ """Test selecting in-stock products"""
93
+ in_stock_products = page.css(
85
94
  'main #products .product-list article.product:not(:contains("Out of stock"))')
86
- self.assertEqual(len(in_stock_products), 2)
95
+ assert len(in_stock_products) == 2
96
+
87
97
 
88
- def test_xpath_selector(self):
89
- """Test Selecting elements with Complex XPath selectors"""
90
- reviews = self.page.xpath(
98
+ # XPath Selector Tests
99
+ class TestXPathSelectors:
100
+ def test_high_rating_reviews(self, page):
101
+ """Test selecting reviews with high ratings"""
102
+ reviews = page.xpath(
91
103
  '//section[@id="reviews"]//div[contains(@class, "review") and @data-rating >= 4]'
92
104
  )
93
- self.assertEqual(len(reviews), 2)
105
+ assert len(reviews) == 2
94
106
 
95
- high_priced_products = self.page.xpath(
107
+ def test_high_priced_products(self, page):
108
+ """Test selecting products above a certain price"""
109
+ high_priced_products = page.xpath(
96
110
  '//article[contains(@class, "product")]'
97
111
  '[number(translate(substring-after(.//span[@class="price"], "$"), ",", "")) > 15]'
98
112
  )
99
- self.assertEqual(len(high_priced_products), 2)
113
+ assert len(high_priced_products) == 2
114
+
115
+
116
+ # Text Matching Tests
117
+ class TestTextMatching:
118
+ def test_regex_multiple_matches(self, page):
119
+ """Test finding multiple matches with regex"""
120
+ stock_info = page.find_by_regex(r'In stock: \d+', first_match=False)
121
+ assert len(stock_info) == 2
100
122
 
101
- def test_find_by_text(self):
102
- """Test Selecting elements with Text matching"""
103
- stock_info = self.page.find_by_regex(r'In stock: \d+', first_match=False)
104
- self.assertEqual(len(stock_info), 2)
123
+ def test_regex_first_match(self, page):
124
+ """Test finding the first match with regex"""
125
+ stock_info = page.find_by_regex(r'In stock: \d+', first_match=True, case_sensitive=True)
126
+ assert stock_info.text == 'In stock: 5'
105
127
 
106
- stock_info = self.page.find_by_regex(r'In stock: \d+', first_match=True, case_sensitive=True)
107
- self.assertEqual(stock_info.text, 'In stock: 5')
128
+ def test_partial_text_match(self, page):
129
+ """Test finding elements with partial text match"""
130
+ stock_info = page.find_by_text(r'In stock:', partial=True, first_match=False)
131
+ assert len(stock_info) == 2
108
132
 
109
- stock_info = self.page.find_by_text(r'In stock:', partial=True, first_match=False)
110
- self.assertEqual(len(stock_info), 2)
133
+ def test_exact_text_match(self, page):
134
+ """Test finding elements with exact text match"""
135
+ out_of_stock = page.find_by_text('Out of stock', partial=False, first_match=False)
136
+ assert len(out_of_stock) == 1
111
137
 
112
- out_of_stock = self.page.find_by_text('Out of stock', partial=False, first_match=False)
113
- self.assertEqual(len(out_of_stock), 1)
114
138
 
115
- def test_find_similar_elements(self):
116
- """Test Finding similar elements of an element"""
117
- first_product = self.page.css_first('.product')
139
+ # Similar Elements Tests
140
+ class TestSimilarElements:
141
+ def test_finding_similar_products(self, page):
142
+ """Test finding similar product elements"""
143
+ first_product = page.css_first('.product')
118
144
  similar_products = first_product.find_similar()
119
- self.assertEqual(len(similar_products), 2)
145
+ assert len(similar_products) == 2
120
146
 
121
- first_review = self.page.find('div', class_='review')
147
+ def test_finding_similar_reviews(self, page):
148
+ """Test finding similar review elements with additional filtering"""
149
+ first_review = page.find('div', class_='review')
122
150
  similar_high_rated_reviews = [
123
151
  review
124
152
  for review in first_review.find_similar()
125
153
  if int(review.attrib.get('data-rating', 0)) >= 4
126
154
  ]
127
- self.assertEqual(len(similar_high_rated_reviews), 1)
155
+ assert len(similar_high_rated_reviews) == 1
128
156
 
129
- def test_expected_errors(self):
130
- """Test errors that should raised if it does"""
131
- with self.assertRaises(ValueError):
157
+
158
+ # Error Handling Tests
159
+ class TestErrorHandling:
160
+ def test_invalid_adaptor_initialization(self):
161
+ """Test various invalid Adaptor initializations"""
162
+ # No arguments
163
+ with pytest.raises(ValueError):
132
164
  _ = Adaptor(auto_match=False)
133
165
 
134
- with self.assertRaises(TypeError):
166
+ # Invalid argument types
167
+ with pytest.raises(TypeError):
135
168
  _ = Adaptor(root="ayo", auto_match=False)
136
169
 
137
- with self.assertRaises(TypeError):
170
+ with pytest.raises(TypeError):
138
171
  _ = Adaptor(text=1, auto_match=False)
139
172
 
140
- with self.assertRaises(TypeError):
173
+ with pytest.raises(TypeError):
141
174
  _ = Adaptor(body=1, auto_match=False)
142
175
 
143
- with self.assertRaises(ValueError):
144
- _ = Adaptor(self.html, storage=object, auto_match=True)
145
-
146
- def test_pickleable(self):
147
- """Test that objects aren't pickleable"""
148
- table = self.page.css('.product-list')[0]
149
- with self.assertRaises(TypeError): # Adaptors
150
- pickle.dumps(table)
151
-
152
- with self.assertRaises(TypeError): # Adaptor
153
- pickle.dumps(table[0])
154
-
155
- def test_overridden(self):
156
- """Test overridden functions"""
157
- table = self.page.css('.product-list')[0]
158
- self.assertTrue(issubclass(type(table.__str__()), str))
159
- self.assertTrue(issubclass(type(table.__repr__()), str))
160
- self.assertTrue(issubclass(type(table.attrib.__str__()), str))
161
- self.assertTrue(issubclass(type(table.attrib.__repr__()), str))
162
-
163
- def test_bad_selector(self):
164
- """Test object can handle bad selector"""
165
- with self.assertRaises((SelectorError, SelectorSyntaxError,)):
166
- self.page.css('4 ayo')
176
+ def test_invalid_storage(self, page, html_content):
177
+ """Test invalid storage parameter"""
178
+ with pytest.raises(ValueError):
179
+ _ = Adaptor(html_content, storage=object, auto_match=True)
167
180
 
168
- with self.assertRaises((SelectorError, SelectorSyntaxError,)):
169
- self.page.xpath('4 ayo')
181
+ def test_bad_selectors(self, page):
182
+ """Test handling of invalid selectors"""
183
+ with pytest.raises((SelectorError, SelectorSyntaxError)):
184
+ page.css('4 ayo')
170
185
 
171
- def test_selectors_generation(self):
172
- """Try to create selectors for all elements in the page"""
173
- def _traverse(element: Adaptor):
174
- self.assertTrue(type(element.generate_css_selector) is str)
175
- self.assertTrue(type(element.generate_xpath_selector) is str)
176
- for branch in element.children:
177
- _traverse(branch)
186
+ with pytest.raises((SelectorError, SelectorSyntaxError)):
187
+ page.xpath('4 ayo')
178
188
 
179
- _traverse(self.page)
180
189
 
181
- def test_getting_all_text(self):
182
- """Test getting all text"""
183
- self.assertNotEqual(self.page.get_all_text(), '')
184
-
185
- def test_element_navigation(self):
186
- """Test moving in the page from selected element"""
187
- table = self.page.css('.product-list')[0]
190
+ # Pickling and Object Representation Tests
191
+ class TestPicklingAndRepresentation:
192
+ def test_unpickleable_objects(self, page):
193
+ """Test that Adaptor objects cannot be pickled"""
194
+ table = page.css('.product-list')[0]
195
+ with pytest.raises(TypeError):
196
+ pickle.dumps(table)
188
197
 
189
- self.assertIsNot(table.path, [])
190
- self.assertNotEqual(table.html_content, '')
191
- self.assertNotEqual(table.prettify(), '')
198
+ with pytest.raises(TypeError):
199
+ pickle.dumps(table[0])
192
200
 
201
+ def test_string_representations(self, page):
202
+ """Test custom string representations of objects"""
203
+ table = page.css('.product-list')[0]
204
+ assert issubclass(type(table.__str__()), str)
205
+ assert issubclass(type(table.__repr__()), str)
206
+ assert issubclass(type(table.attrib.__str__()), str)
207
+ assert issubclass(type(table.attrib.__repr__()), str)
208
+
209
+
210
+ # Navigation and Traversal Tests
211
+ class TestElementNavigation:
212
+ def test_basic_navigation_properties(self, page):
213
+ """Test basic navigation properties of elements"""
214
+ table = page.css('.product-list')[0]
215
+ assert table.path is not None
216
+ assert table.html_content != ''
217
+ assert table.prettify() != ''
218
+
219
+ def test_parent_and_sibling_navigation(self, page):
220
+ """Test parent and sibling navigation"""
221
+ table = page.css('.product-list')[0]
193
222
  parent = table.parent
194
- self.assertEqual(parent.attrib['id'], 'products')
195
-
196
- children = table.children
197
- self.assertEqual(len(children), 3)
223
+ assert parent.attrib['id'] == 'products'
198
224
 
199
225
  parent_siblings = parent.siblings
200
- self.assertEqual(len(parent_siblings), 1)
226
+ assert len(parent_siblings) == 1
227
+
228
+ def test_child_navigation(self, page):
229
+ """Test child navigation"""
230
+ table = page.css('.product-list')[0]
231
+ children = table.children
232
+ assert len(children) == 3
201
233
 
202
- child = table.find({'data-id': "1"})
234
+ def test_next_and_previous_navigation(self, page):
235
+ """Test next and previous element navigation"""
236
+ child = page.css('.product-list')[0].find({'data-id': "1"})
203
237
  next_element = child.next
204
- self.assertEqual(next_element.attrib['data-id'], '2')
238
+ assert next_element.attrib['data-id'] == '2'
205
239
 
206
240
  prev_element = next_element.previous
207
- self.assertEqual(prev_element.tag, child.tag)
241
+ assert prev_element.tag == child.tag
208
242
 
209
- all_prices = self.page.css('.price')
243
+ def test_ancestor_finding(self, page):
244
+ """Test finding ancestors of elements"""
245
+ all_prices = page.css('.price')
210
246
  products_with_prices = [
211
247
  price.find_ancestor(lambda p: p.has_class('product'))
212
248
  for price in all_prices
213
249
  ]
214
- self.assertEqual(len(products_with_prices), 3)
215
-
216
- def test_empty_return(self):
217
- """Test cases where functions shouldn't have results"""
218
- test_html = """
219
- <html>
220
- <span id="a"><a></a><!--comment--></span>
221
- <span id="b"><!--comment--><a></a></span>
222
- </html>"""
223
- soup = Adaptor(test_html, auto_match=False, keep_comments=False)
224
- html_tag = soup.css('html')[0]
225
- self.assertEqual(html_tag.path, [])
226
- self.assertEqual(html_tag.siblings, [])
227
- self.assertEqual(html_tag.parent, None)
228
- self.assertEqual(html_tag.find_ancestor(lambda e: e), None)
229
-
230
- self.assertEqual(soup.css('#a a')[0].next, None)
231
- self.assertEqual(soup.css('#b a')[0].previous, None)
232
-
233
- def test_text_to_json(self):
234
- """Test converting text to json"""
235
- script_content = self.page.css('#page-data::text')[0]
236
- self.assertTrue(issubclass(type(script_content.sort()), str))
250
+ assert len(products_with_prices) == 3
251
+
252
+
253
+ # JSON and Attribute Tests
254
+ class TestJSONAndAttributes:
255
+ def test_json_conversion(self, page):
256
+ """Test converting content to JSON"""
257
+ script_content = page.css('#page-data::text')[0]
258
+ assert issubclass(type(script_content.sort()), str)
237
259
  page_data = script_content.json()
238
- self.assertEqual(page_data['totalProducts'], 3)
239
- self.assertTrue('lastUpdated' in page_data)
240
-
241
- def test_regex_on_text(self):
242
- """Test doing regex on a selected text"""
243
- element = self.page.css('[data-id="1"] .price')[0]
244
- match = element.re_first(r'[\.\d]+')
245
- self.assertEqual(match, '10.99')
246
- match = element.text.re(r'(\d+)', replace_entities=False)
247
- self.assertEqual(len(match), 2)
248
-
249
- def test_attribute_operations(self):
250
- """Test operations on elements attributes"""
251
- products = self.page.css('.product')
260
+ assert page_data['totalProducts'] == 3
261
+ assert 'lastUpdated' in page_data
262
+
263
+ def test_attribute_operations(self, page):
264
+ """Test various attribute-related operations"""
265
+ # Product ID extraction
266
+ products = page.css('.product')
252
267
  product_ids = [product.attrib['data-id'] for product in products]
253
- self.assertEqual(product_ids, ['1', '2', '3'])
254
- self.assertTrue('data-id' in products[0].attrib)
268
+ assert product_ids == ['1', '2', '3']
269
+ assert 'data-id' in products[0].attrib
255
270
 
256
- reviews = self.page.css('.review')
271
+ # Review rating calculations
272
+ reviews = page.css('.review')
257
273
  review_ratings = [int(review.attrib['data-rating']) for review in reviews]
258
- self.assertEqual(sum(review_ratings) / len(review_ratings), 4.5)
274
+ assert sum(review_ratings) / len(review_ratings) == 4.5
259
275
 
276
+ # Attribute searching
260
277
  key_value = list(products[0].attrib.search_values('1', partial=False))
261
- self.assertEqual(list(key_value[0].keys()), ['data-id'])
278
+ assert list(key_value[0].keys()) == ['data-id']
262
279
 
263
280
  key_value = list(products[0].attrib.search_values('1', partial=True))
264
- self.assertEqual(list(key_value[0].keys()), ['data-id'])
281
+ assert list(key_value[0].keys()) == ['data-id']
282
+
283
+ # JSON attribute conversion
284
+ attr_json = page.css_first('#products').attrib['schema'].json()
285
+ assert attr_json == {'jsonable': 'data'}
286
+ assert isinstance(page.css('#products')[0].attrib.json_string, bytes)
287
+
288
+
289
+ # Performance Test
290
+ def test_large_html_parsing_performance():
291
+ """Test parsing and selecting performance on large HTML"""
292
+ large_html = '<html><body>' + '<div class="item">' * 5000 + '</div>' * 5000 + '</body></html>'
293
+
294
+ start_time = time.time()
295
+ parsed = Adaptor(large_html, auto_match=False)
296
+ elements = parsed.css('.item')
297
+ end_time = time.time()
298
+
299
+ assert len(elements) == 5000
300
+ # Converting 5000 elements to a class and doing operations on them will take time
301
+ # Based on my tests with 100 runs, 1 loop each Scrapling (given the extra work/features) takes 10.4ms on average
302
+ assert end_time - start_time < 0.5 # Locally I test on 0.1 but on GitHub actions with browsers and threading sometimes closing adds fractions of seconds
303
+
304
+
305
+ # Selector Generation Test
306
+ def test_selectors_generation(page):
307
+ """Try to create selectors for all elements in the page"""
265
308
 
266
- attr_json = self.page.css_first('#products').attrib['schema'].json()
267
- self.assertEqual(attr_json, {'jsonable': 'data'})
268
- self.assertEqual(type(self.page.css('#products')[0].attrib.json_string), bytes)
309
+ def _traverse(element: Adaptor):
310
+ assert isinstance(element.generate_css_selector, str)
311
+ assert isinstance(element.generate_xpath_selector, str)
312
+ for branch in element.children:
313
+ _traverse(branch)
269
314
 
270
- def test_performance(self):
271
- """Test parsing and selecting speed"""
272
- import time
273
- large_html = '<html><body>' + '<div class="item">' * 5000 + '</div>' * 5000 + '</body></html>'
315
+ _traverse(page)
274
316
 
275
- start_time = time.time()
276
- parsed = Adaptor(large_html, auto_match=False, debug=False)
277
- elements = parsed.css('.item')
278
- end_time = time.time()
279
317
 
280
- self.assertEqual(len(elements), 5000)
281
- # Converting 5000 elements to a class and doing operations on them will take time
282
- # Based on my tests with 100 runs, 1 loop each Scrapling (given the extra work/features) takes 10.4ms on average
283
- self.assertLess(end_time - start_time, 0.5) # Locally I test on 0.1 but on GitHub actions with browsers and threading sometimes closing adds fractions of seconds
318
+ # Miscellaneous Tests
319
+ def test_getting_all_text(page):
320
+ """Test getting all text from the page"""
321
+ assert page.get_all_text() != ''
284
322
 
285
323
 
286
- # Use `coverage run -m unittest --verbose tests/test_parser_functions.py` instead for the coverage report
287
- # if __name__ == '__main__':
288
- # unittest.main(verbosity=2)
324
+ def test_regex_on_text(page):
325
+ """Test regex operations on text"""
326
+ element = page.css('[data-id="1"] .price')[0]
327
+ match = element.re_first(r'[\.\d]+')
328
+ assert match == '10.99'
329
+ match = element.text.re(r'(\d+)', replace_entities=False)
330
+ assert len(match) == 2
@@ -1,42 +0,0 @@
1
- scrapling/__init__.py,sha256=0-gw4uqckCs7ikl6sHiB5c6y0AelpgefqJkBmSd7j1k,469
2
- scrapling/defaults.py,sha256=qO6zAS7k5_QXvbjuoBv87fUMqASGMuM2dVry9J9auv0,287
3
- scrapling/fetchers.py,sha256=iw1wEuFg14akJYpSg9webfBjAL341Pnofn4IkWahGlE,17486
4
- scrapling/parser.py,sha256=suXggr39GimLnnLm9ivM1CQ40AoDwGke2sgnWszqFqk,54331
5
- scrapling/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
6
- scrapling/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- scrapling/core/_types.py,sha256=__HJ2JTk5vx5eg_7HAJmDjaHrMDIaoxNG8fadLLyKV8,566
8
- scrapling/core/custom_types.py,sha256=8GCgcZL-IT5lP6titxL-RPCiItQSuJZjSlFIGCDxoSs,8402
9
- scrapling/core/mixins.py,sha256=sozbpaGL1_O_x3U-ABM5aYWpnxpCLfdbcA9SG3P7weY,3532
10
- scrapling/core/storage_adaptors.py,sha256=Q2-G7oDqoIqlIBEmnUsKwSzM2lNGNUPKtTbMjTV9178,6218
11
- scrapling/core/translator.py,sha256=WN_xPyYrD1MjLPv8Ar8zHNTPC_iYsW29kkjET4hbFI0,5228
12
- scrapling/core/utils.py,sha256=RajDRSPkVmszjpwNy8NIz8ZlUxPox8j2rSractr7Q9s,3779
13
- scrapling/engines/__init__.py,sha256=zA7tzqcDXP0hllwmjVewNHWipIA4JSU9mRG4J-cud0c,267
14
- scrapling/engines/camo.py,sha256=fmpGMW5T7we5cQC8muyvVo_A27yAqc5csm7dO_2jHiE,8446
15
- scrapling/engines/constants.py,sha256=WTn-X4kFIDWjXTiqOT0tm4XT5pijcdohFyZ0Af2C5Xc,3723
16
- scrapling/engines/pw.py,sha256=kWbkHm2vnQYeGuJnicKlAL1HrBKuXoFtyRMNFXLs4VY,13962
17
- scrapling/engines/static.py,sha256=h629IjT78YbhjFYBVSli53lKiYrG3929TAaZ7TA-j-Y,8022
18
- scrapling/engines/toolbelt/__init__.py,sha256=0tSsxMH5ALOMPXrLkr8mTH7LWg9QfIse4Ij9vUFgYjY,391
19
- scrapling/engines/toolbelt/custom.py,sha256=tab_wJmN6onvu2U8tDXeJ9jn6A47jTkmxSBoc-w8dIk,12789
20
- scrapling/engines/toolbelt/fingerprints.py,sha256=Y3FW8uqxxeNK3v6vBVvki8VjeG5oRxSwim4Q2Hv_cRk,2917
21
- scrapling/engines/toolbelt/navigation.py,sha256=Okpl4ynlLn2cUpSiaaoXDSOdDOXhvxNOOGphE_HXc5k,4016
22
- scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
23
- scrapling/engines/toolbelt/bypasses/notification_permission.js,sha256=poPM3o5WYgEX-EdiUfDCllpWfc3Umvw4jr2u6O6elus,237
24
- scrapling/engines/toolbelt/bypasses/pdf_viewer.js,sha256=mKjjSuP1-BOGC_2WhRYHJo_LP7lTBi2KXmP_zsHO_tI,173
25
- scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js,sha256=3RP1AE_XZRvpupeV_i-WSNVqRxyUy0qd8rQV8j_4j3U,221
26
- scrapling/engines/toolbelt/bypasses/screen_props.js,sha256=fZEuHMQ1-fYuxxUMoQXUvVWYUkPUbblkfMfpiLvBY7w,599
27
- scrapling/engines/toolbelt/bypasses/webdriver_fully.js,sha256=hdJw4clRAJQqIdq5gIFC_eC-x7C1i2ab01KV5ylmOBs,728
28
- scrapling/engines/toolbelt/bypasses/window_chrome.js,sha256=D7hqzNGGDorh8JVlvm2YIv7Bk2CoVkG55MDIdyqhT1w,6808
29
- tests/__init__.py,sha256=YHFB5ftzgLQVh6gbPfbYcY4yOS9DOBp5dBa6I-qtm8U,32
30
- tests/fetchers/__init__.py,sha256=6H4NgARhyTcGGd3dNCKQJ8kUFdrAEMSScQL7Ga_vU3c,43
31
- tests/fetchers/test_camoufox.py,sha256=-1v_0mXeBcAVW932nkFws1HIDCodGbpNYniSnVMHeeU,3116
32
- tests/fetchers/test_httpx.py,sha256=rrw9q4KdDAHpQVa4sTmw278Yv1OlwY_SKPbpBPLVN7c,3508
33
- tests/fetchers/test_playwright.py,sha256=xwhRmlw7WBrtqyilZsoMHkHpyAx7iXQ-YexDMJURTao,3702
34
- tests/fetchers/test_utils.py,sha256=FPPJkBrqgYxdGeWwapH8Vj8zyfYVLiTE1qSLu8eBWik,5728
35
- tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- tests/parser/test_automatch.py,sha256=BeeYJi3cYCghbiZmi57z4bqcGPaoUA8GAm7MALBBkkk,2486
37
- tests/parser/test_general.py,sha256=sPbwQRka9Mh8MDz2Sto8Rwg78t0SWWxELgzhTVPEplE,11785
38
- scrapling-0.2.8.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
39
- scrapling-0.2.8.dist-info/METADATA,sha256=0As--zWykpljObaw8DZQJr6udpHm4NyRN-dfUOUrhBc,66605
40
- scrapling-0.2.8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
41
- scrapling-0.2.8.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
42
- scrapling-0.2.8.dist-info/RECORD,,