selectolax 0.3.28__cp39-cp39-macosx_10_9_x86_64.whl → 0.3.30__cp39-cp39-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

selectolax/__init__.py CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  __author__ = """Artem Golubin"""
5
5
  __email__ = 'me@rushter.com'
6
- __version__ = '0.3.28'
6
+ __version__ = '0.3.30'
7
7
 
8
8
  from . import parser
9
9
  from . import lexbor
@@ -6,16 +6,16 @@ _TAG_TO_NAME = {
6
6
  0x0004: "-comment",
7
7
  }
8
8
  ctypedef fused str_or_LexborNode:
9
- basestring
9
+ str
10
10
  bytes
11
11
  LexborNode
12
12
 
13
13
  cdef inline bytes to_bytes(str_or_LexborNode value):
14
14
  cdef bytes bytes_val
15
- if isinstance(value, (str, unicode)):
16
- bytes_val = value.encode(_ENCODING)
15
+ if isinstance(value, unicode):
16
+ bytes_val = <bytes>value.encode("utf-8")
17
17
  elif isinstance(value, bytes):
18
- bytes_val = <char*> value
18
+ bytes_val = <bytes>value
19
19
  return bytes_val
20
20
 
21
21
  @cython.final
@@ -416,9 +416,14 @@ cdef class LexborNode:
416
416
  node = node.next
417
417
 
418
418
 
419
- def unwrap(self):
419
+ def unwrap(self, delete_empty=False):
420
420
  """Replace node with whatever is inside this node.
421
421
 
422
+ Parameters
423
+ ----------
424
+ delete_empty : bool, default False
425
+ If True, removes empty tags.
426
+
422
427
  Examples
423
428
  --------
424
429
 
@@ -427,8 +432,11 @@ cdef class LexborNode:
427
432
  >>> tree.html
428
433
  '<html><head></head><body><div>Hello world!</div></body></html>'
429
434
 
435
+ Note: by default, empty tags are ignored, use "delete_empty" to change this.
430
436
  """
431
437
  if self.node.first_child == NULL:
438
+ if delete_empty:
439
+ lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
432
440
  return
433
441
  cdef lxb_dom_node_t* next_node;
434
442
  cdef lxb_dom_node_t* current_node;
@@ -445,7 +453,7 @@ cdef class LexborNode:
445
453
  lxb_dom_node_insert_before(self.node, self.node.first_child)
446
454
  lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
447
455
 
448
- def unwrap_tags(self, list tags):
456
+ def unwrap_tags(self, list tags, delete_empty = False):
449
457
  """Unwraps specified tags from the HTML tree.
450
458
 
451
459
  Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -454,6 +462,8 @@ cdef class LexborNode:
454
462
  ----------
455
463
  tags : list
456
464
  List of tags to remove.
465
+ delete_empty : bool, default False
466
+ If True, removes empty tags.
457
467
 
458
468
  Examples
459
469
  --------
@@ -462,11 +472,13 @@ cdef class LexborNode:
462
472
  >>> tree.body.unwrap_tags(['i','a'])
463
473
  >>> tree.body.html
464
474
  '<body><div>Hello world!</div></body>'
475
+
476
+ Note: by default, empty tags are ignored, use "delete_empty" to change this.
465
477
  """
466
478
 
467
479
  for tag in tags:
468
480
  for element in self.css(tag):
469
- element.unwrap()
481
+ element.unwrap(delete_empty)
470
482
 
471
483
 
472
484
  def traverse(self, include_text=False):
@@ -38,6 +38,9 @@ cdef class LexborCSSSelector:
38
38
  cdef lxb_char_t* c_selector
39
39
  cdef lxb_css_selector_list_t * selectors_list
40
40
 
41
+ if not isinstance(query, str):
42
+ raise TypeError("Query must be a string.")
43
+
41
44
  bytes_query = query.encode(_ENCODING)
42
45
  selectors_list = lxb_css_selectors_parse(self.parser, <lxb_char_t *> bytes_query, <size_t>len(query))
43
46
 
@@ -59,6 +62,9 @@ cdef class LexborCSSSelector:
59
62
  cdef lxb_char_t * c_selector
60
63
  cdef lxb_css_selector_list_t * selectors_list
61
64
 
65
+ if not isinstance(query, str):
66
+ raise TypeError("Query must be a string.")
67
+
62
68
  bytes_query = query.encode(_ENCODING)
63
69
  selectors_list = lxb_css_selectors_parse(self.parser, <lxb_char_t *> bytes_query, <size_t> len(query))
64
70