selectolax 0.3.27__cp311-cp311-musllinux_1_2_x86_64.whl → 0.3.29__cp311-cp311-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

selectolax/__init__.py CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  __author__ = """Artem Golubin"""
5
5
  __email__ = 'me@rushter.com'
6
- __version__ = '0.3.27'
6
+ __version__ = '0.3.29'
7
7
 
8
8
  from . import parser
9
9
  from . import lexbor
@@ -19,8 +19,9 @@ cdef class LexborAttributes:
19
19
 
20
20
  while attr != NULL:
21
21
  key = lxb_dom_attr_local_name_noi(attr, &str_len)
22
+ if key is not NULL:
23
+ yield key.decode(_ENCODING)
22
24
  attr = attr.next
23
- yield key.decode(_ENCODING)
24
25
 
25
26
  def __setitem__(self, str key, value):
26
27
  value = str(value)
@@ -95,7 +95,7 @@ cdef class LexborNode:
95
95
  text : str
96
96
  """
97
97
  cdef lexbor_str_t *lxb_str
98
- cdef lxb_status_t lxb_status_t
98
+ cdef lxb_status_t status
99
99
 
100
100
  lxb_str = lexbor_str_create()
101
101
  status = lxb_html_serialize_tree_str(self.node, lxb_str)
@@ -416,9 +416,14 @@ cdef class LexborNode:
416
416
  node = node.next
417
417
 
418
418
 
419
- def unwrap(self):
419
+ def unwrap(self, delete_empty=False):
420
420
  """Replace node with whatever is inside this node.
421
421
 
422
+ Parameters
423
+ ----------
424
+ delete_empty : bool, default False
425
+ If True, removes empty tags.
426
+
422
427
  Examples
423
428
  --------
424
429
 
@@ -426,9 +431,12 @@ cdef class LexborNode:
426
431
  >>> tree.css_first('i').unwrap()
427
432
  >>> tree.html
428
433
  '<html><head></head><body><div>Hello world!</div></body></html>'
429
-
434
+
435
+ Note: by default, empty tags are ignored, use "delete_empty" to change this.
430
436
  """
431
437
  if self.node.first_child == NULL:
438
+ if delete_empty:
439
+ lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
432
440
  return
433
441
  cdef lxb_dom_node_t* next_node;
434
442
  cdef lxb_dom_node_t* current_node;
@@ -445,7 +453,7 @@ cdef class LexborNode:
445
453
  lxb_dom_node_insert_before(self.node, self.node.first_child)
446
454
  lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
447
455
 
448
- def unwrap_tags(self, list tags):
456
+ def unwrap_tags(self, list tags, delete_empty = False):
449
457
  """Unwraps specified tags from the HTML tree.
450
458
 
451
459
  Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -454,6 +462,8 @@ cdef class LexborNode:
454
462
  ----------
455
463
  tags : list
456
464
  List of tags to remove.
465
+ delete_empty : bool, default False
466
+ If True, removes empty tags.
457
467
 
458
468
  Examples
459
469
  --------
@@ -462,11 +472,13 @@ cdef class LexborNode:
462
472
  >>> tree.body.unwrap_tags(['i','a'])
463
473
  >>> tree.body.html
464
474
  '<body><div>Hello world!</div></body>'
475
+
476
+ Note: by default, empty tags are ignored, use "delete_empty" to change this.
465
477
  """
466
478
 
467
479
  for tag in tags:
468
480
  for element in self.css(tag):
469
- element.unwrap()
481
+ element.unwrap(delete_empty)
470
482
 
471
483
 
472
484
  def traverse(self, include_text=False):
@@ -77,10 +77,12 @@ cdef class LexborCSSSelector:
77
77
 
78
78
 
79
79
  def __dealloc__(self):
80
- lxb_selectors_destroy(self.selectors, True)
81
- # lxb_css_memory_destroy(self.parser.memory, True)
82
- lxb_css_parser_destroy(self.parser, True)
83
- lxb_css_selectors_destroy(self.css_selectors, True)
80
+ if self.selectors != NULL:
81
+ lxb_selectors_destroy(self.selectors, True)
82
+ if self.parser != NULL:
83
+ lxb_css_parser_destroy(self.parser, True)
84
+ if self.css_selectors != NULL:
85
+ lxb_css_selectors_destroy(self.css_selectors, True)
84
86
 
85
87
 
86
88
 
@@ -104,16 +106,16 @@ cdef class LexborSelector:
104
106
  raise SelectolaxError("This features is not supported by the lexbor backend. Please use Modest backend.")
105
107
 
106
108
  @property
107
- def matches(self):
109
+ def matches(self) -> list:
108
110
  """Returns all possible matches"""
109
111
  return self.nodes
110
112
 
111
113
  @property
112
- def any_matches(self):
114
+ def any_matches(self) -> bool:
113
115
  """Returns True if there are any matches"""
114
116
  return bool(self.nodes)
115
117
 
116
- def text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
118
+ def text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> LexborSelector:
117
119
  """Filter all current matches given text."""
118
120
  nodes = []
119
121
  for node in self.nodes:
@@ -123,7 +125,7 @@ cdef class LexborSelector:
123
125
  self.nodes = nodes
124
126
  return self
125
127
 
126
- def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False):
128
+ def any_text_contains(self, str text, bool deep=True, str separator='', bool strip=False) -> bool:
127
129
  """Returns True if any node in the current search scope contains specified text"""
128
130
  nodes = []
129
131
  for node in self.nodes:
@@ -132,7 +134,7 @@ cdef class LexborSelector:
132
134
  return True
133
135
  return False
134
136
 
135
- def attribute_longer_than(self, str attribute, int length, str start = None):
137
+ def attribute_longer_than(self, str attribute, int length, str start = None) -> LexborSelector:
136
138
  """Filter all current matches by attribute length.
137
139
 
138
140
  Similar to `string-length` in XPath.
@@ -147,7 +149,7 @@ cdef class LexborSelector:
147
149
  self.nodes = nodes
148
150
  return self
149
151
 
150
- def any_attribute_longer_than(self, str attribute, int length, str start = None):
152
+ def any_attribute_longer_than(self, str attribute, int length, str start = None) -> bool:
151
153
  """Returns True any href attribute longer than a specified length.
152
154
 
153
155
  Similar to `string-length` in XPath.