selectolax 0.3.29__cp311-cp311-musllinux_1_2_aarch64.whl → 0.4.0__cp311-cp311-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

selectolax/__init__.py CHANGED
@@ -2,9 +2,7 @@
2
2
 
3
3
 
4
4
  __author__ = """Artem Golubin"""
5
- __email__ = 'me@rushter.com'
6
- __version__ = '0.3.29'
5
+ __email__ = "me@rushter.com"
6
+ __version__ = "0.4.0"
7
7
 
8
- from . import parser
9
- from . import lexbor
10
- from . import modest
8
+ from . import lexbor, modest, parser
@@ -1,5 +1,6 @@
1
1
  cimport cython
2
2
 
3
+
3
4
  @cython.final
4
5
  cdef class LexborAttributes:
5
6
  """A dict-like object that represents attributes."""
@@ -23,16 +24,32 @@ cdef class LexborAttributes:
23
24
  yield key.decode(_ENCODING)
24
25
  attr = attr.next
25
26
 
26
- def __setitem__(self, str key, value):
27
- value = str(value)
27
+ def __setitem__(self, str key, object value):
28
+ value = value
28
29
  bytes_key = key.encode(_ENCODING)
29
- bytes_value = value.encode(_ENCODING)
30
-
31
- lxb_dom_element_set_attribute(
32
- <lxb_dom_element_t *> self.node,
33
- <lxb_char_t *> bytes_key, len(bytes_key),
34
- <lxb_char_t *> bytes_value, len(bytes_value),
35
- )
30
+ bytes_value = value.encode(_ENCODING) if value else b""
31
+ cdef lxb_dom_attr_t *attr
32
+ cdef lxb_dom_document_t *doc
33
+
34
+ if value is None:
35
+ # N.B. This is suboptimal, but there is not API to set empty attributes
36
+ attr = lxb_dom_element_set_attribute(
37
+ <lxb_dom_element_t *> self.node,
38
+ <lxb_char_t *> bytes_key, len(bytes_key),
39
+ NULL, 0
40
+ )
41
+ doc = (<lxb_dom_node_t*>attr).owner_document
42
+ lexbor_str_destroy(attr.value, doc.text, 0)
43
+ attr.value = NULL
44
+
45
+ elif isinstance(value, str) or isinstance(value, unicode) :
46
+ lxb_dom_element_set_attribute(
47
+ <lxb_dom_element_t *> self.node,
48
+ <lxb_char_t *> bytes_key, len(bytes_key),
49
+ <lxb_char_t *> bytes_value, len(bytes_value),
50
+ )
51
+ else:
52
+ raise TypeError("Expected str or unicode, got %s" % type(value))
36
53
 
37
54
  def __delitem__(self, key):
38
55
  try:
@@ -1,31 +1,43 @@
1
1
  cimport cython
2
+ from cpython.exc cimport PyErr_SetNone
3
+
4
+ import logging
5
+
6
+ logger = logging.getLogger("selectolax")
2
7
 
3
8
  _TAG_TO_NAME = {
4
- 0x0005: "- doctype",
9
+ 0x0005: "-doctype",
5
10
  0x0002: "-text",
6
11
  0x0004: "-comment",
7
12
  }
8
13
  ctypedef fused str_or_LexborNode:
9
- basestring
14
+ str
10
15
  bytes
11
16
  LexborNode
12
17
 
18
+ ctypedef fused str_or_bytes:
19
+ str
20
+ bytes
21
+
13
22
  cdef inline bytes to_bytes(str_or_LexborNode value):
14
23
  cdef bytes bytes_val
15
- if isinstance(value, (str, unicode)):
16
- bytes_val = value.encode(_ENCODING)
24
+ if isinstance(value, unicode):
25
+ bytes_val = <bytes>value.encode("utf-8")
17
26
  elif isinstance(value, bytes):
18
- bytes_val = <char*> value
27
+ bytes_val = <bytes>value
19
28
  return bytes_val
20
29
 
30
+
21
31
  @cython.final
22
32
  cdef class LexborNode:
23
33
  """A class that represents HTML node (element)."""
24
34
 
25
- cdef _cinit(self, lxb_dom_node_t *node, LexborHTMLParser parser):
26
- self.parser = parser
27
- self.node = node
28
- return self
35
+ @staticmethod
36
+ cdef LexborNode new(lxb_dom_node_t *node, LexborHTMLParser parser):
37
+ cdef LexborNode lxbnode = LexborNode.__new__(LexborNode)
38
+ lxbnode.node = node
39
+ lxbnode.parser = parser
40
+ return lxbnode
29
41
 
30
42
  @property
31
43
  def mem_id(self):
@@ -33,7 +45,10 @@ cdef class LexborNode:
33
45
 
34
46
  @property
35
47
  def child(self):
36
- """Alias for the `first_child` property."""
48
+ """Alias for the `first_child` property.
49
+
50
+ **Deprecated**. Please use `first_child` instead.
51
+ """
37
52
  return self.first_child
38
53
 
39
54
  @property
@@ -41,8 +56,7 @@ cdef class LexborNode:
41
56
  """Return the first child node."""
42
57
  cdef LexborNode node
43
58
  if self.node.first_child:
44
- node = LexborNode()
45
- node._cinit(<lxb_dom_node_t *> self.node.first_child, self.parser)
59
+ node = LexborNode.new(<lxb_dom_node_t *> self.node.first_child, self.parser)
46
60
  return node
47
61
  return None
48
62
 
@@ -50,9 +64,8 @@ cdef class LexborNode:
50
64
  def parent(self):
51
65
  """Return the parent node."""
52
66
  cdef LexborNode node
53
- if self.node.parent:
54
- node = LexborNode()
55
- node._cinit(<lxb_dom_node_t *> self.node.parent, self.parser)
67
+ if self.node.parent != NULL:
68
+ node = LexborNode.new(<lxb_dom_node_t *> self.node.parent, self.parser)
56
69
  return node
57
70
  return None
58
71
 
@@ -60,9 +73,8 @@ cdef class LexborNode:
60
73
  def next(self):
61
74
  """Return next node."""
62
75
  cdef LexborNode node
63
- if self.node.next:
64
- node = LexborNode()
65
- node._cinit(<lxb_dom_node_t *> self.node.next, self.parser)
76
+ if self.node.next != NULL:
77
+ node = LexborNode.new(<lxb_dom_node_t *> self.node.next, self.parser)
66
78
  return node
67
79
  return None
68
80
 
@@ -70,9 +82,8 @@ cdef class LexborNode:
70
82
  def prev(self):
71
83
  """Return previous node."""
72
84
  cdef LexborNode node
73
- if self.node.prev:
74
- node = LexborNode()
75
- node._cinit(<lxb_dom_node_t *> self.node.prev, self.parser)
85
+ if self.node.prev != NULL:
86
+ node = LexborNode.new(<lxb_dom_node_t *> self.node.prev, self.parser)
76
87
  return node
77
88
  return None
78
89
 
@@ -80,9 +91,8 @@ cdef class LexborNode:
80
91
  def last_child(self):
81
92
  """Return last child node."""
82
93
  cdef LexborNode node
83
- if self.node.last_child:
84
- node = LexborNode()
85
- node._cinit(<lxb_dom_node_t *> self.node.last_child, self.parser)
94
+ if self.node.last_child != NULL:
95
+ node = LexborNode.new(<lxb_dom_node_t *> self.node.last_child, self.parser)
86
96
  return node
87
97
  return None
88
98
 
@@ -181,6 +191,12 @@ cdef class LexborNode:
181
191
  Matches pattern `query` against HTML tree.
182
192
  `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
183
193
 
194
+ Special selectors:
195
+
196
+ - parser.css('p:lexbor-contains("awesome" i)') -- case-insensitive contains
197
+ - parser.css('p:lexbor-contains("awesome")') -- case-sensitive contains
198
+
199
+
184
200
  Parameters
185
201
  ----------
186
202
  query : str
@@ -195,13 +211,15 @@ cdef class LexborNode:
195
211
  def css_first(self, str query, default=None, bool strict=False):
196
212
  """Same as `css` but returns only the first match.
197
213
 
214
+ When `strict=False` stops at the first match. Works faster.
215
+
198
216
  Parameters
199
217
  ----------
200
218
 
201
219
  query : str
202
- default : bool, default None
220
+ default : Any, default None
203
221
  Default value to return if there is no match.
204
- strict: bool, default True
222
+ strict: bool, default False
205
223
  Set to True if you want to check if there is strictly only one match in the document.
206
224
 
207
225
 
@@ -209,8 +227,10 @@ cdef class LexborNode:
209
227
  -------
210
228
  selector : `LexborNode` object
211
229
  """
212
- # TODO: This can be improved.
213
- results = self.css(query)
230
+ if strict:
231
+ results = self.parser.selector.find(query, self)
232
+ else:
233
+ results = self.parser.selector.find_first(query, self)
214
234
  n_results = len(results)
215
235
  if n_results > 0:
216
236
  if strict and n_results > 1:
@@ -227,7 +247,7 @@ cdef class LexborNode:
227
247
 
228
248
  def css_matches(self, str selector):
229
249
  """Returns True if CSS selector matches a node."""
230
- return self.parser.selector.any_matches(selector, self)
250
+ return bool(self.parser.selector.any_matches(selector, self))
231
251
 
232
252
  def __repr__(self):
233
253
  return '<LexborNode %s>' % self.tag
@@ -241,6 +261,14 @@ cdef class LexborNode:
241
261
  def tag(self):
242
262
  """Return the name of the current tag (e.g. div, p, img).
243
263
 
264
+ For for non-tag nodes, returns the following names:
265
+
266
+ * `-text` - text node
267
+ * `-document` - document node
268
+ * `-comment` - comment node
269
+
270
+ This
271
+
244
272
  Returns
245
273
  -------
246
274
  text : str
@@ -256,7 +284,6 @@ cdef class LexborNode:
256
284
  text = c_text.decode(_ENCODING)
257
285
  return text
258
286
 
259
-
260
287
  def decompose(self, bool recursive=True):
261
288
  """Remove the current node from the tree.
262
289
 
@@ -273,10 +300,13 @@ cdef class LexborNode:
273
300
  >>> tag.decompose()
274
301
 
275
302
  """
303
+ if self.node == <lxb_dom_node_t *> lxb_dom_document_root(&self.parser.document.dom_document):
304
+ raise SelectolaxError("Decomposing the root node is not allowed.")
305
+
276
306
  if recursive:
277
- lxb_dom_node_destroy_deep(<lxb_dom_node_t *> self.node)
307
+ node_remove_deep(<lxb_dom_node_t *> self.node)
278
308
  else:
279
- lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
309
+ lxb_dom_node_remove(<lxb_dom_node_t *> self.node)
280
310
 
281
311
  def strip_tags(self, list tags, bool recursive = False):
282
312
  """Remove specified tags from the HTML tree.
@@ -298,11 +328,11 @@ cdef class LexborNode:
298
328
  '<html><body><div>Hello world!</div></body></html>'
299
329
 
300
330
  """
331
+ cdef LexborNode element
301
332
  for tag in tags:
302
333
  for element in self.css(tag):
303
334
  element.decompose(recursive=recursive)
304
335
 
305
-
306
336
  @property
307
337
  def attributes(self):
308
338
  """Get all attributes that belong to the current node.
@@ -325,6 +355,9 @@ cdef class LexborNode:
325
355
  cdef size_t str_len = 0
326
356
  attributes = dict()
327
357
 
358
+ if self.node.type != LXB_DOM_NODE_TYPE_ELEMENT:
359
+ return attributes
360
+
328
361
  while attr != NULL:
329
362
  key = lxb_dom_attr_local_name_noi(attr, &str_len)
330
363
  value = lxb_dom_attr_value_noi(attr, &str_len)
@@ -410,15 +443,15 @@ cdef class LexborNode:
410
443
  node = node.next
411
444
  continue
412
445
 
413
- next_node = LexborNode()
414
- next_node._cinit(<lxb_dom_node_t *> node, self.parser)
446
+ next_node = LexborNode.new(<lxb_dom_node_t *> node, self.parser)
415
447
  yield next_node
416
448
  node = node.next
417
449
 
418
-
419
- def unwrap(self, delete_empty=False):
450
+ def unwrap(self, bint delete_empty=False):
420
451
  """Replace node with whatever is inside this node.
421
452
 
453
+ Does nothing if you perform unwrapping second time on the same node.
454
+
422
455
  Parameters
423
456
  ----------
424
457
  delete_empty : bool, default False
@@ -431,15 +464,20 @@ cdef class LexborNode:
431
464
  >>> tree.css_first('i').unwrap()
432
465
  >>> tree.html
433
466
  '<html><head></head><body><div>Hello world!</div></body></html>'
434
-
467
+
435
468
  Note: by default, empty tags are ignored, use "delete_empty" to change this.
436
469
  """
470
+
471
+ if node_is_removed(<lxb_dom_node_t *> self.node) == 1:
472
+ logger.error("Attempt to unwrap removed node. Does nothing.")
473
+ return
474
+
437
475
  if self.node.first_child == NULL:
438
476
  if delete_empty:
439
- lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
477
+ lxb_dom_node_remove(<lxb_dom_node_t *> self.node)
440
478
  return
441
- cdef lxb_dom_node_t* next_node;
442
- cdef lxb_dom_node_t* current_node;
479
+ cdef lxb_dom_node_t* next_node
480
+ cdef lxb_dom_node_t* current_node
443
481
 
444
482
  if self.node.first_child.next != NULL:
445
483
  current_node = self.node.first_child
@@ -451,9 +489,9 @@ cdef class LexborNode:
451
489
  current_node = next_node
452
490
  else:
453
491
  lxb_dom_node_insert_before(self.node, self.node.first_child)
454
- lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
492
+ lxb_dom_node_remove(<lxb_dom_node_t *> self.node)
455
493
 
456
- def unwrap_tags(self, list tags, delete_empty = False):
494
+ def unwrap_tags(self, list tags, bint delete_empty = False):
457
495
  """Unwraps specified tags from the HTML tree.
458
496
 
459
497
  Works the same as the ``unwrap`` method, but applied to a list of tags.
@@ -472,14 +510,56 @@ cdef class LexborNode:
472
510
  >>> tree.body.unwrap_tags(['i','a'])
473
511
  >>> tree.body.html
474
512
  '<body><div>Hello world!</div></body>'
475
-
513
+
476
514
  Note: by default, empty tags are ignored, use "delete_empty" to change this.
477
515
  """
478
-
516
+ cdef LexborNode element
479
517
  for tag in tags:
480
518
  for element in self.css(tag):
481
519
  element.unwrap(delete_empty)
482
520
 
521
+ def merge_text_nodes(self):
522
+ """Iterates over all text nodes and merges all text nodes that are close to each other.
523
+
524
+ This is useful for text extraction.
525
+ Use it when you need to strip HTML tags and merge "dangling" text.
526
+
527
+ Examples
528
+ --------
529
+
530
+ >>> tree = LexborHTMLParser("<div><p><strong>J</strong>ohn</p><p>Doe</p></div>")
531
+ >>> node = tree.css_first('div')
532
+ >>> tree.unwrap_tags(["strong"])
533
+ >>> tree.text(deep=True, separator=" ", strip=True)
534
+ "J ohn Doe" # Text extraction produces an extra space because the strong tag was removed.
535
+ >>> node.merge_text_nodes()
536
+ >>> tree.text(deep=True, separator=" ", strip=True)
537
+ "John Doe"
538
+ """
539
+ cdef lxb_dom_node_t *node = self.node.first_child
540
+ cdef lxb_dom_node_t *next_node
541
+ cdef lxb_char_t *left_text
542
+ cdef lxb_char_t *right_text
543
+ cdef size_t left_length, right_length
544
+
545
+ while node != NULL:
546
+ next_node = node.next
547
+ if node.type == LXB_DOM_NODE_TYPE_TEXT and node.prev and node.prev.type == LXB_DOM_NODE_TYPE_TEXT:
548
+ left_text = lxb_dom_node_text_content(node.prev, &left_length)
549
+ right_text = lxb_dom_node_text_content(node, &right_length)
550
+ if left_text and right_text:
551
+ combined = (<bytes>left_text[:left_length]) + (<bytes>right_text[:right_length])
552
+ lxb_dom_node_text_content_set(node, combined, len(combined))
553
+ lxb_dom_node_remove(node.prev)
554
+
555
+ if left_text is not NULL:
556
+ lxb_dom_document_destroy_text_noi(self.node.owner_document, left_text)
557
+ if right_text is not NULL:
558
+ lxb_dom_document_destroy_text_noi(self.node.owner_document, right_text)
559
+
560
+ if node.first_child:
561
+ LexborNode.new(node, self.parser).merge_text_nodes()
562
+ node = next_node
483
563
 
484
564
  def traverse(self, include_text=False):
485
565
  """Iterate over all child and next nodes starting from the current level.
@@ -499,8 +579,7 @@ cdef class LexborNode:
499
579
 
500
580
  while node != NULL:
501
581
  if not (not include_text and node.type == LXB_DOM_NODE_TYPE_TEXT):
502
- lxb_node = LexborNode()
503
- lxb_node._cinit(<lxb_dom_node_t *> node, self.parser)
582
+ lxb_node = LexborNode.new(<lxb_dom_node_t *> node, self.parser)
504
583
  yield lxb_node
505
584
 
506
585
  if node.first_child != NULL:
@@ -550,7 +629,7 @@ cdef class LexborNode:
550
629
  if new_node == NULL:
551
630
  raise SelectolaxError("Can't create a new node")
552
631
  lxb_dom_node_insert_before(self.node, new_node)
553
- lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
632
+ lxb_dom_node_remove(<lxb_dom_node_t *> self.node)
554
633
  elif isinstance(value, LexborNode):
555
634
  new_node = lxb_dom_document_import_node(
556
635
  &self.parser.document.dom_document,
@@ -560,11 +639,10 @@ cdef class LexborNode:
560
639
  if new_node == NULL:
561
640
  raise SelectolaxError("Can't create a new node")
562
641
  lxb_dom_node_insert_before(self.node, <lxb_dom_node_t *> new_node)
563
- lxb_dom_node_destroy(<lxb_dom_node_t *> self.node)
642
+ lxb_dom_node_remove(<lxb_dom_node_t *> self.node)
564
643
  else:
565
644
  raise SelectolaxError("Expected a string or LexborNode instance, but %s found" % type(value).__name__)
566
645
 
567
-
568
646
  def insert_before(self, str_or_LexborNode value):
569
647
  """
570
648
  Insert a node before the current Node.
@@ -739,7 +817,7 @@ cdef class LexborNode:
739
817
  >>> selector.child.raw_value
740
818
  b'&#x3C;test&#x3E;'
741
819
  """
742
- raise SelectolaxError("This features is not supported by the lexbor backend. Please use Modest backend.")
820
+ raise NotImplementedError("This features is not supported by the lexbor backend. Please use Modest backend.")
743
821
 
744
822
  def scripts_contain(self, str query):
745
823
  """Returns True if any of the script tags contain specified text.
@@ -752,6 +830,7 @@ cdef class LexborNode:
752
830
  The query to check.
753
831
 
754
832
  """
833
+ cdef LexborNode node
755
834
  if self.parser.cached_script_texts is None:
756
835
  nodes = self.parser.selector.find('script', self)
757
836
  text_nodes = []
@@ -776,6 +855,7 @@ cdef class LexborNode:
776
855
  queries : tuple of str
777
856
 
778
857
  """
858
+ cdef LexborNode node
779
859
  if self.parser.cached_script_srcs is None:
780
860
  nodes = self.parser.selector.find('script', self)
781
861
  src_nodes = []
@@ -831,31 +911,99 @@ cdef class LexborNode:
831
911
  """
832
912
  cdef unsigned char * text
833
913
  cdef lxb_dom_node_t* node = <lxb_dom_node_t*> self.node.first_child
834
-
835
- container = TextContainer()
914
+ cdef TextContainer container
836
915
  if self.node == NULL or self.node.type != LXB_DOM_NODE_TYPE_TEXT:
837
916
  return None
917
+
838
918
  text = <unsigned char *> lexbor_str_data_noi(&(<lxb_dom_character_data_t *> self.node).data)
839
919
  if text != NULL:
920
+ container = TextContainer.new_with_defaults()
840
921
  py_text = text.decode(_ENCODING)
841
922
  container.append(py_text)
842
923
  return container.text
924
+
925
+ @property
926
+ def inner_html(self) -> str:
927
+ """Return HTML representation of the child nodes.
928
+
929
+ Works similar to innerHTML in JavaScript.
930
+ Unlike the `.html` property, does not include the current node.
931
+ Can be used to set HTML as well. See the setter docstring.
932
+
933
+ Returns
934
+ -------
935
+ text : str | None
936
+ """
937
+
938
+ cdef lexbor_str_t *lxb_str
939
+ cdef lxb_status_t status
940
+
941
+ lxb_str = lexbor_str_create()
942
+ status = lxb_html_serialize_deep_str(self.node, lxb_str)
943
+ if status == 0 and lxb_str.data:
944
+ html = lxb_str.data.decode(_ENCODING).replace('<-undef>', '')
945
+ lexbor_str_destroy(lxb_str, self.node.owner_document.text, True)
946
+ return html
947
+ return None
948
+
949
+ @inner_html.setter
950
+ def inner_html(self, str html):
951
+ """Set inner HTML to the specified HTML.
952
+
953
+ Replaces existing data inside the node.
954
+ Works similar to innerHTML in JavaScript.
955
+
956
+ Parameters
957
+ ----------
958
+ html : str | None
959
+
960
+ """
961
+ cdef bytes bytes_val
962
+ bytes_val = <bytes>html.encode("utf-8")
963
+ lxb_html_element_inner_html_set(
964
+ <lxb_html_element_t *>self.node,
965
+ <lxb_char_t *> bytes_val, len(bytes_val)
966
+ )
967
+
968
+ def clone(self) -> LexborNode:
969
+ """Clone the current node.
970
+
971
+ You can use to do temporary modifications without affecting the original HTML tree.
972
+
973
+ It is tied to the current parser instance.
974
+ Gets destroyed when parser instance is destroyed.
975
+ """
976
+ cdef lxb_dom_node_t* node
977
+ node = lxb_dom_node_clone(<lxb_dom_node_t *> self.node, 1)
978
+ return LexborNode.new(node, self.parser)
979
+
980
+
981
+ @cython.internal
843
982
  @cython.final
844
983
  cdef class TextContainer:
845
984
  cdef str _text
846
- cdef public str separator
847
- cdef public bool strip
985
+ cdef str separator
986
+ cdef bint strip
987
+
988
+ @staticmethod
989
+ cdef TextContainer new_with_defaults():
990
+ cdef TextContainer cls = TextContainer.__new__(TextContainer)
991
+ cls._text = ''
992
+ cls.separator = ''
993
+ cls.strip = False
994
+ return cls
848
995
 
849
996
  def __init__(self, str separator = '', bool strip = False):
850
997
  self._text = ""
851
998
  self.separator = separator
852
999
  self.strip = strip
853
1000
 
854
- def append(self, node_text):
1001
+ def append(self, str node_text):
855
1002
  if self.strip:
856
1003
  self._text += node_text.strip() + self.separator
857
1004
  else:
858
1005
  self._text += node_text + self.separator
1006
+
859
1007
  @property
860
1008
  def text(self):
861
1009
  if self.separator and self._text and self._text.endswith(self.separator):
@@ -864,7 +1012,7 @@ cdef class TextContainer:
864
1012
 
865
1013
 
866
1014
  cdef lexbor_action_t text_callback(lxb_dom_node_t *node, void *ctx):
867
- cdef unsigned char *text;
1015
+ cdef unsigned char *text
868
1016
  cdef lxb_tag_id_t tag_id = lxb_dom_node_tag_id_noi(node)
869
1017
  if tag_id != LXB_TAG__TEXT:
870
1018
  return LEXBOR_ACTION_OK
@@ -872,8 +1020,15 @@ cdef lexbor_action_t text_callback(lxb_dom_node_t *node, void *ctx):
872
1020
  text = <unsigned char*> lexbor_str_data_noi(&(<lxb_dom_text_t *> node).char_data.data)
873
1021
  if not text:
874
1022
  return LEXBOR_ACTION_OK
875
- py_str = text.decode(_ENCODING)
876
- cdef object cls
877
- cls = <object> ctx
1023
+
1024
+ try:
1025
+ py_str = text.decode(_ENCODING, "replace")
1026
+
1027
+ except Exception as e:
1028
+ PyErr_SetNone(e)
1029
+ return LEXBOR_ACTION_STOP
1030
+
1031
+ cdef TextContainer cls
1032
+ cls = <TextContainer> ctx
878
1033
  cls.append(py_str)
879
1034
  return LEXBOR_ACTION_OK
@@ -0,0 +1,29 @@
1
+
2
+ cdef lxb_dom_node_t * node_remove_deep(lxb_dom_node_t* root):
3
+ cdef lxb_dom_node_t *tmp
4
+ cdef lxb_dom_node_t *node = root
5
+
6
+ while node != NULL:
7
+ if node.first_child != NULL:
8
+ node = node.first_child
9
+ else:
10
+ while node != root and node.next == NULL:
11
+ tmp = node.parent
12
+ lxb_dom_node_remove(node)
13
+ node = tmp
14
+
15
+ if node == root:
16
+ lxb_dom_node_remove(node)
17
+ break
18
+
19
+ tmp = node.next
20
+ lxb_dom_node_remove(node)
21
+ node = tmp
22
+
23
+ return NULL
24
+
25
+ cdef bint node_is_removed(lxb_dom_node_t* node):
26
+ if node.parent == NULL and node.next == NULL \
27
+ and node.prev == NULL:
28
+ return 1
29
+ return 0