selectolax 0.3.29__cp39-cp39-macosx_10_9_x86_64.whl → 0.3.31__cp39-cp39-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

selectolax/__init__.py CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  __author__ = """Artem Golubin"""
5
5
  __email__ = 'me@rushter.com'
6
- __version__ = '0.3.29'
6
+ __version__ = '0.3.31'
7
7
 
8
8
  from . import parser
9
9
  from . import lexbor
@@ -1,5 +1,7 @@
1
1
  cimport cython
2
2
 
3
+ from typing import Optional
4
+
3
5
  @cython.final
4
6
  cdef class LexborAttributes:
5
7
  """A dict-like object that represents attributes."""
@@ -23,16 +25,32 @@ cdef class LexborAttributes:
23
25
  yield key.decode(_ENCODING)
24
26
  attr = attr.next
25
27
 
26
- def __setitem__(self, str key, value):
27
- value = str(value)
28
+ def __setitem__(self, str key, object value):
29
+ value = value
28
30
  bytes_key = key.encode(_ENCODING)
29
- bytes_value = value.encode(_ENCODING)
30
-
31
- lxb_dom_element_set_attribute(
32
- <lxb_dom_element_t *> self.node,
33
- <lxb_char_t *> bytes_key, len(bytes_key),
34
- <lxb_char_t *> bytes_value, len(bytes_value),
35
- )
31
+ bytes_value = value.encode(_ENCODING) if value else b""
32
+ cdef lxb_dom_attr_t *attr
33
+ cdef lxb_dom_document_t *doc
34
+
35
+ if value is None:
36
+ # N.B. This is suboptimal, but there is not API to set empty attributes
37
+ attr = lxb_dom_element_set_attribute(
38
+ <lxb_dom_element_t *> self.node,
39
+ <lxb_char_t *> bytes_key, len(bytes_key),
40
+ NULL, 0
41
+ )
42
+ doc = (<lxb_dom_node_t*>attr).owner_document
43
+ lexbor_str_destroy(attr.value, doc.text, 0)
44
+ attr.value = NULL
45
+
46
+ elif isinstance(value, str) or isinstance(value, unicode) :
47
+ lxb_dom_element_set_attribute(
48
+ <lxb_dom_element_t *> self.node,
49
+ <lxb_char_t *> bytes_key, len(bytes_key),
50
+ <lxb_char_t *> bytes_value, len(bytes_value),
51
+ )
52
+ else:
53
+ raise TypeError("Expected str or unicode, got %s" % type(value))
36
54
 
37
55
  def __delitem__(self, key):
38
56
  try:
@@ -6,16 +6,16 @@ _TAG_TO_NAME = {
6
6
  0x0004: "-comment",
7
7
  }
8
8
  ctypedef fused str_or_LexborNode:
9
- basestring
9
+ str
10
10
  bytes
11
11
  LexborNode
12
12
 
13
13
  cdef inline bytes to_bytes(str_or_LexborNode value):
14
14
  cdef bytes bytes_val
15
- if isinstance(value, (str, unicode)):
16
- bytes_val = value.encode(_ENCODING)
15
+ if isinstance(value, unicode):
16
+ bytes_val = <bytes>value.encode("utf-8")
17
17
  elif isinstance(value, bytes):
18
- bytes_val = <char*> value
18
+ bytes_val = <bytes>value
19
19
  return bytes_val
20
20
 
21
21
  @cython.final
@@ -273,6 +273,9 @@ cdef class LexborNode:
273
273
  >>> tag.decompose()
274
274
 
275
275
  """
276
+ if self.node == <lxb_dom_node_t *> lxb_dom_document_root(&self.parser.document.dom_document):
277
+ raise SelectolaxError("Decomposing the root node is not allowed.")
278
+
276
279
  if recursive:
277
280
  lxb_dom_node_destroy_deep(<lxb_dom_node_t *> self.node)
278
281
  else:
@@ -431,7 +434,7 @@ cdef class LexborNode:
431
434
  >>> tree.css_first('i').unwrap()
432
435
  >>> tree.html
433
436
  '<html><head></head><body><div>Hello world!</div></body></html>'
434
-
437
+
435
438
  Note: by default, empty tags are ignored, use "delete_empty" to change this.
436
439
  """
437
440
  if self.node.first_child == NULL:
@@ -472,7 +475,7 @@ cdef class LexborNode:
472
475
  >>> tree.body.unwrap_tags(['i','a'])
473
476
  >>> tree.body.html
474
477
  '<body><div>Hello world!</div></body>'
475
-
478
+
476
479
  Note: by default, empty tags are ignored, use "delete_empty" to change this.
477
480
  """
478
481
 
@@ -38,6 +38,9 @@ cdef class LexborCSSSelector:
38
38
  cdef lxb_char_t* c_selector
39
39
  cdef lxb_css_selector_list_t * selectors_list
40
40
 
41
+ if not isinstance(query, str):
42
+ raise TypeError("Query must be a string.")
43
+
41
44
  bytes_query = query.encode(_ENCODING)
42
45
  selectors_list = lxb_css_selectors_parse(self.parser, <lxb_char_t *> bytes_query, <size_t>len(query))
43
46
 
@@ -59,6 +62,9 @@ cdef class LexborCSSSelector:
59
62
  cdef lxb_char_t * c_selector
60
63
  cdef lxb_css_selector_list_t * selectors_list
61
64
 
65
+ if not isinstance(query, str):
66
+ raise TypeError("Query must be a string.")
67
+
62
68
  bytes_query = query.encode(_ENCODING)
63
69
  selectors_list = lxb_css_selectors_parse(self.parser, <lxb_char_t *> bytes_query, <size_t> len(query))
64
70
 
@@ -69,6 +75,7 @@ cdef class LexborCSSSelector:
69
75
  status = lxb_selectors_find(self.selectors, node.node, selectors_list,
70
76
  <lxb_selectors_cb_f> css_matcher_callback, <void *> self)
71
77
  if status != LXB_STATUS_OK:
78
+ lxb_css_selector_list_destroy_memory(selectors_list)
72
79
  raise SelectolaxError("Can't parse CSS selector.")
73
80
  result = bool(self.results)
74
81
  self.results = []