selectolax 0.3.28__cp310-cp310-musllinux_1_2_aarch64.whl → 0.4.0__cp310-cp310-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

selectolax/parser.pyi CHANGED
@@ -1,8 +1,10 @@
1
- from typing import Any, Iterator, TypeVar, Literal, overload
1
+ from typing import Iterator, Literal, TypeVar, overload
2
2
 
3
3
  DefaultT = TypeVar("DefaultT")
4
4
 
5
5
  class _Attributes:
6
+ """A dict-like object that represents attributes."""
7
+
6
8
  @staticmethod
7
9
  def create(node: Node, decode_errors: str) -> _Attributes: ...
8
10
  def keys(self) -> Iterator[str]: ...
@@ -22,7 +24,9 @@ class _Attributes:
22
24
  @overload
23
25
  def sget(self, key: str, default: str | DefaultT) -> str | DefaultT: ...
24
26
  @overload
25
- def sget(self, key: str, default: str = "") -> str: ...
27
+ def sget(self, key: str, default: str = "") -> str:
28
+ """Same as get, but returns empty strings instead of None values for empty attributes."""
29
+ ...
26
30
 
27
31
  class Selector:
28
32
  """An advanced CSS selector that supports additional operations.
@@ -69,51 +73,140 @@ class Selector:
69
73
  ...
70
74
 
71
75
  class Node:
76
+ """A class that represents HTML node (element)."""
77
+
72
78
  parser: HTMLParser
73
79
  @property
74
80
  def attributes(self) -> dict[str, str | None]:
75
81
  """Get all attributes that belong to the current node.
76
82
 
77
- The value of empty attributes is None."""
83
+ The value of empty attributes is None.
84
+
85
+ Returns
86
+ -------
87
+ attributes : dictionary of all attributes.
88
+
89
+ Examples
90
+ --------
91
+
92
+ >>> tree = HTMLParser("<div data id='my_id'></div>")
93
+ >>> node = tree.css_first('div')
94
+ >>> node.attributes
95
+ {'data': None, 'id': 'my_id'}
96
+ """
78
97
  ...
79
98
  @property
80
99
  def attrs(self) -> _Attributes:
81
- """A dict-like object that is similar to the attributes property, but operates directly on the Node data."""
100
+ """A dict-like object that is similar to the ``attributes`` property, but operates directly on the Node data.
101
+
102
+ .. warning:: Use ``attributes`` instead, if you don't want to modify Node attributes.
103
+
104
+ Returns
105
+ -------
106
+ attributes : Attributes mapping object.
107
+
108
+ Examples
109
+ --------
110
+
111
+ >>> tree = HTMLParser("<div id='a'></div>")
112
+ >>> node = tree.css_first('div')
113
+ >>> node.attrs
114
+ <div attributes, 1 items>
115
+ >>> node.attrs['id']
116
+ 'a'
117
+ >>> node.attrs['foo'] = 'bar'
118
+ >>> del node.attrs['id']
119
+ >>> node.attributes
120
+ {'foo': 'bar'}
121
+ >>> node.attrs['id'] = 'new_id'
122
+ >>> node.html
123
+ '<div foo="bar" id="new_id"></div>'
124
+ """
82
125
  ...
83
126
  @property
84
127
  def id(self) -> str | None:
85
128
  """Get the id attribute of the node.
86
129
 
87
- Returns None if id does not set."""
130
+ Returns None if id does not set.
131
+
132
+ Returns
133
+ -------
134
+ text : str
135
+ """
88
136
  ...
89
137
 
90
138
  def mem_id(self) -> int:
91
- """Get the mem_id of the node.
139
+ """Get the mem_id attribute of the node.
92
140
 
93
- Returns 0 if mem_id does not set."""
141
+ Returns
142
+ -------
143
+ text : int
144
+ """
94
145
  ...
95
146
 
96
147
  def __hash__(self) -> int:
97
- """ Get the hash of this node
148
+ """Get the hash of this node
98
149
  :return: int
99
150
  """
100
151
  ...
101
152
  def text(self, deep: bool = True, separator: str = "", strip: bool = False) -> str:
102
- """Returns the text of the node including text of all its child nodes."""
153
+ """Returns the text of the node including text of all its child nodes.
154
+
155
+ Parameters
156
+ ----------
157
+ strip : bool, default False
158
+ If true, calls ``str.strip()`` on each text part to remove extra white spaces.
159
+ separator : str, default ''
160
+ The separator to use when joining text from different nodes.
161
+ deep : bool, default True
162
+ If True, includes text from all child nodes.
163
+
164
+ Returns
165
+ -------
166
+ text : str
167
+ """
103
168
  ...
104
169
  def iter(self, include_text: bool = False) -> Iterator[Node]:
105
- """Iterate over nodes on the current level."""
170
+ """Iterate over nodes on the current level.
171
+
172
+ Parameters
173
+ ----------
174
+ include_text : bool
175
+ If True, includes text nodes as well.
176
+
177
+ Yields
178
+ -------
179
+ node
180
+ """
106
181
  ...
107
182
  def traverse(self, include_text: bool = False) -> Iterator[Node]:
108
- """Iterate over all child and next nodes starting from the current level."""
183
+ """Iterate over all child and next nodes starting from the current level.
184
+
185
+ Parameters
186
+ ----------
187
+ include_text : bool
188
+ If True, includes text nodes as well.
189
+
190
+ Yields
191
+ -------
192
+ node
193
+ """
109
194
  ...
110
195
  @property
111
196
  def tag(self) -> str:
112
- """Return the name of the current tag (e.g. div, p, img)."""
197
+ """Return the name of the current tag (e.g. div, p, img).
198
+
199
+ Returns
200
+ -------
201
+ text : str
202
+ """
113
203
  ...
114
204
  @property
115
205
  def child(self) -> Node | None:
116
- """Return the child node."""
206
+ """Alias for the `first_child` property.
207
+
208
+ **Deprecated**. Please use `first_child` instead.
209
+ """
117
210
  ...
118
211
  @property
119
212
  def parent(self) -> Node | None:
@@ -133,7 +226,12 @@ class Node:
133
226
  ...
134
227
  @property
135
228
  def html(self) -> str | None:
136
- """Return HTML representation of the current node including all its child nodes."""
229
+ """Return HTML representation of the current node including all its child nodes.
230
+
231
+ Returns
232
+ -------
233
+ text : str
234
+ """
137
235
  ...
138
236
  def css(self, query: str) -> list[Node]:
139
237
  """Evaluate CSS selector against current node and its child nodes."""
@@ -145,84 +243,315 @@ class Node:
145
243
  """Returns True if CSS selector matches a node."""
146
244
  ...
147
245
  @overload
148
- def css_first(
149
- self, query: str, default: Any = ..., strict: Literal[True] = ...
150
- ) -> Node: ...
151
- @overload
152
246
  def css_first(
153
247
  self, query: str, default: DefaultT, strict: bool = False
154
248
  ) -> Node | DefaultT: ...
155
249
  @overload
156
250
  def css_first(
157
- self, query: str, default: None = ..., strict: bool = False
158
- ) -> Node | None:
251
+ self, query: str, default: None = None, strict: bool = False
252
+ ) -> Node | None | DefaultT:
159
253
  """Evaluate CSS selector against current node and its child nodes."""
160
254
  ...
161
255
  def decompose(self, recursive: bool = True) -> None:
162
- """Remove a Node from the tree."""
256
+ """Remove a Node from the tree.
257
+
258
+ Parameters
259
+ ----------
260
+ recursive : bool, default True
261
+ Whenever to delete all its child nodes
262
+
263
+ Examples
264
+ --------
265
+
266
+ >>> tree = HTMLParser(html)
267
+ >>> for tag in tree.css('script'):
268
+ >>> tag.decompose()
269
+ """
163
270
  ...
164
271
  def remove(self, recursive: bool = True) -> None:
165
272
  """An alias for the decompose method."""
166
273
  ...
167
- def unwrap(self) -> None:
168
- """Replace node with whatever is inside this node."""
274
+ def unwrap(self, delete_empty: bool = False) -> None:
275
+ """Replace node with whatever is inside this node.
276
+
277
+ Parameters
278
+ ----------
279
+ delete_empty : bool, default False
280
+ Whenever to delete empty tags.
281
+
282
+ Examples
283
+ --------
284
+
285
+ >>> tree = HTMLParser("<div>Hello <i>world</i>!</div>")
286
+ >>> tree.css_first('i').unwrap()
287
+ >>> tree.html
288
+ '<html><head></head><body><div>Hello world!</div></body></html>'
289
+
290
+ Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
291
+ """
169
292
  ...
170
293
  def strip_tags(self, tags: list[str], recursive: bool = False) -> None:
171
- """Remove specified tags from the HTML tree."""
294
+ """Remove specified tags from the HTML tree.
295
+
296
+ Parameters
297
+ ----------
298
+ tags : list
299
+ List of tags to remove.
300
+ recursive : bool, default True
301
+ Whenever to delete all its child nodes
302
+
303
+ Examples
304
+ --------
305
+
306
+ >>> tree = HTMLParser('<html><head></head><body><script></script><div>Hello world!</div></body></html>')
307
+ >>> tags = ['head', 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes']
308
+ >>> tree.strip_tags(tags)
309
+ >>> tree.html
310
+ '<html><body><div>Hello world!</div></body></html>'
311
+ """
172
312
  ...
173
- def unwrap_tags(self, tags: list[str]) -> None:
313
+ def unwrap_tags(self, tags: list[str], delete_empty: bool = False) -> None:
174
314
  """Unwraps specified tags from the HTML tree.
175
315
 
176
- Works the same as the unwrap method, but applied to a list of tags."""
316
+ Works the same as the unwrap method, but applied to a list of tags.
317
+
318
+ Parameters
319
+ ----------
320
+ tags : list
321
+ List of tags to remove.
322
+ delete_empty : bool, default False
323
+ Whenever to delete empty tags.
324
+
325
+ Examples
326
+ --------
327
+
328
+ >>> tree = HTMLParser("<div><a href="">Hello</a> <i>world</i>!</div>")
329
+ >>> tree.body.unwrap_tags(['i','a'])
330
+ >>> tree.body.html
331
+ '<body><div>Hello world!</div></body>'
332
+
333
+ Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
334
+ """
177
335
  ...
178
336
  def replace_with(self, value: str | bytes | None) -> None:
179
- """Replace current Node with specified value."""
337
+ """Replace current Node with specified value.
338
+
339
+ Parameters
340
+ ----------
341
+ value : str, bytes or Node
342
+ The text or Node instance to replace the Node with.
343
+ When a text string is passed, it's treated as text. All HTML tags will be escaped.
344
+ Convert and pass the ``Node`` object when you want to work with HTML.
345
+ Does not clone the ``Node`` object.
346
+ All future changes to the passed ``Node`` object will also be taken into account.
347
+
348
+ Examples
349
+ --------
350
+
351
+ >>> tree = HTMLParser('<div>Get <img src="" alt="Laptop"></div>')
352
+ >>> img = tree.css_first('img')
353
+ >>> img.replace_with(img.attributes.get('alt', ''))
354
+ >>> tree.body.child.html
355
+ '<div>Get Laptop</div>'
356
+
357
+ >>> html_parser = HTMLParser('<div>Get <span alt="Laptop"><img src="/jpg"> <div></div></span></div>')
358
+ >>> html_parser2 = HTMLParser('<div>Test</div>')
359
+ >>> img_node = html_parser.css_first('img')
360
+ >>> img_node.replace_with(html_parser2.body.child)
361
+ '<div>Get <span alt="Laptop"><div>Test</div> <div></div></span></div>'
362
+ """
180
363
  ...
181
364
  def insert_before(self, value: str | bytes | None) -> None:
182
- """Insert a node before the current Node."""
365
+ """Insert a node before the current Node.
366
+
367
+ Parameters
368
+ ----------
369
+ value : str, bytes or Node
370
+ The text or Node instance to insert before the Node.
371
+ When a text string is passed, it's treated as text. All HTML tags will be escaped.
372
+ Convert and pass the ``Node`` object when you want to work with HTML.
373
+ Does not clone the ``Node`` object.
374
+ All future changes to the passed ``Node`` object will also be taken into account.
375
+
376
+ Examples
377
+ --------
378
+
379
+ >>> tree = HTMLParser('<div>Get <img src="" alt="Laptop"></div>')
380
+ >>> img = tree.css_first('img')
381
+ >>> img.insert_before(img.attributes.get('alt', ''))
382
+ >>> tree.body.child.html
383
+ '<div>Get Laptop<img src="" alt="Laptop"></div>'
384
+
385
+ >>> html_parser = HTMLParser('<div>Get <span alt="Laptop"><img src="/jpg"> <div></div></span></div>')
386
+ >>> html_parser2 = HTMLParser('<div>Test</div>')
387
+ >>> img_node = html_parser.css_first('img')
388
+ >>> img_node.insert_before(html_parser2.body.child)
389
+ <div>Get <span alt="Laptop"><div>Test</div><img src="/jpg"> <div></div></span></div>'
390
+ """
183
391
  ...
184
392
  def insert_after(self, value: str | bytes | None) -> None:
185
- """Insert a node after the current Node."""
393
+ """Insert a node after the current Node.
394
+
395
+ Parameters
396
+ ----------
397
+ value : str, bytes or Node
398
+ The text or Node instance to insert after the Node.
399
+ When a text string is passed, it's treated as text. All HTML tags will be escaped.
400
+ Convert and pass the ``Node`` object when you want to work with HTML.
401
+ Does not clone the ``Node`` object.
402
+ All future changes to the passed ``Node`` object will also be taken into account.
403
+
404
+ Examples
405
+ --------
406
+
407
+ >>> tree = HTMLParser('<div>Get <img src="" alt="Laptop"></div>')
408
+ >>> img = tree.css_first('img')
409
+ >>> img.insert_after(img.attributes.get('alt', ''))
410
+ >>> tree.body.child.html
411
+ '<div>Get <img src="" alt="Laptop">Laptop</div>'
412
+
413
+ >>> html_parser = HTMLParser('<div>Get <span alt="Laptop"><img src="/jpg"> <div></div></span></div>')
414
+ >>> html_parser2 = HTMLParser('<div>Test</div>')
415
+ >>> img_node = html_parser.css_first('img')
416
+ >>> img_node.insert_after(html_parser2.body.child)
417
+ <div>Get <span alt="Laptop"><img src="/jpg"><div>Test</div> <div></div></span></div>'
418
+ """
186
419
  ...
187
420
  def insert_child(self, value: str | bytes | None) -> None:
188
- """Insert a node inside (at the end of) the current Node.."""
421
+ """Insert a node inside (at the end of) the current Node.
422
+
423
+ Parameters
424
+ ----------
425
+ value : str, bytes or Node
426
+ The text or Node instance to insert inside the Node.
427
+ When a text string is passed, it's treated as text. All HTML tags will be escaped.
428
+ Convert and pass the ``Node`` object when you want to work with HTML.
429
+ Does not clone the ``Node`` object.
430
+ All future changes to the passed ``Node`` object will also be taken into account.
431
+
432
+ Examples
433
+ --------
434
+
435
+ >>> tree = HTMLParser('<div>Get <img src=""></div>')
436
+ >>> div = tree.css_first('div')
437
+ >>> div.insert_child('Laptop')
438
+ >>> tree.body.child.html
439
+ '<div>Get <img src="">Laptop</div>'
440
+
441
+ >>> html_parser = HTMLParser('<div>Get <span alt="Laptop"> <div>Laptop</div> </span></div>')
442
+ >>> html_parser2 = HTMLParser('<div>Test</div>')
443
+ >>> span_node = html_parser.css_first('span')
444
+ >>> span_node.insert_child(html_parser2.body.child)
445
+ <div>Get <span alt="Laptop"> <div>Laptop</div> <div>Test</div> </span></div>'
446
+ """
189
447
  ...
190
448
  @property
191
449
  def raw_value(self) -> bytes:
192
450
  """Return the raw (unparsed, original) value of a node.
193
451
 
194
- Currently, works on text nodes only."""
452
+ Currently, works on text nodes only.
453
+
454
+ Returns
455
+ -------
456
+
457
+ raw_value : bytes
458
+
459
+ Examples
460
+ --------
461
+
462
+ >>> html_parser = HTMLParser('<div>&#x3C;test&#x3E;</div>')
463
+ >>> selector = html_parser.css_first('div')
464
+ >>> selector.child.html
465
+ '&lt;test&gt;'
466
+ >>> selector.child.raw_value
467
+ b'&#x3C;test&#x3E;'
468
+ """
195
469
  ...
196
470
  def select(self, query: str | None = None) -> Selector:
197
471
  """Select nodes given a CSS selector.
198
472
 
199
473
  Works similarly to the css method, but supports chained filtering and extra features.
474
+
475
+ Parameters
476
+ ----------
477
+ query : str or None
478
+ The CSS selector to use when searching for nodes.
479
+
480
+ Returns
481
+ -------
482
+ selector : The `Selector` class.
200
483
  """
201
484
  ...
202
485
  def scripts_contain(self, query: str) -> bool:
203
486
  """Returns True if any of the script tags contain specified text.
204
487
 
205
- Caches script tags on the first call to improve performance."""
488
+ Caches script tags on the first call to improve performance.
489
+
490
+ Parameters
491
+ ----------
492
+ query : str
493
+ The query to check.
494
+ """
206
495
  ...
207
496
  def script_srcs_contain(self, queries: tuple[str]) -> bool:
208
497
  """Returns True if any of the script SRCs attributes contain on of the specified text.
209
498
 
210
- Caches values on the first call to improve performance."""
499
+ Caches values on the first call to improve performance.
500
+
501
+ Parameters
502
+ ----------
503
+ queries : tuple of str
504
+ """
211
505
  ...
212
506
  @property
213
507
  def text_content(self) -> str | None:
214
508
  """Returns the text of the node if it is a text node.
215
509
 
216
- Returns None for other nodes. Unlike the text method, does not include child nodes.
510
+ Returns None for other nodes.
511
+ Unlike the ``text`` method, does not include child nodes.
512
+
513
+ Returns
514
+ -------
515
+ text : str or None.
217
516
  """
218
517
  ...
219
518
  def merge_text_nodes(self):
220
519
  """Iterates over all text nodes and merges all text nodes that are close to each other.
221
520
 
222
- This is useful for text extraction."""
521
+ This is useful for text extraction.
522
+ Use it when you need to strip HTML tags and merge "dangling" text.
523
+
524
+ Examples
525
+ --------
526
+
527
+ >>> tree = HTMLParser("<div><p><strong>J</strong>ohn</p><p>Doe</p></div>")
528
+ >>> node = tree.css_first('div')
529
+ >>> tree.unwrap_tags(["strong"])
530
+ >>> tree.text(deep=True, separator=" ", strip=True)
531
+ "J ohn Doe" # Text extraction produces an extra space because the strong tag was removed.
532
+ >>> node.merge_text_nodes()
533
+ >>> tree.text(deep=True, separator=" ", strip=True)
534
+ "John Doe"
535
+ """
223
536
  ...
224
537
 
225
538
  class HTMLParser:
539
+ """The HTML parser.
540
+
541
+ Use this class to parse raw HTML.
542
+
543
+ Parameters
544
+ ----------
545
+
546
+ html : str (unicode) or bytes
547
+ detect_encoding : bool, default True
548
+ If `True` and html type is `bytes` then encoding will be detected automatically.
549
+ use_meta_tags : bool, default True
550
+ Whether to use meta tags in encoding detection process.
551
+ decode_errors : str, default 'ignore'
552
+ Same as in builtin's str.decode, i.e 'strict', 'ignore' or 'replace'.
553
+ """
554
+
226
555
  def __init__(
227
556
  self,
228
557
  html: bytes | str,
@@ -233,27 +562,50 @@ class HTMLParser:
233
562
  def css(self, query: str) -> list[Node]:
234
563
  """A CSS selector.
235
564
 
236
- Matches pattern query against HTML tree."""
565
+ Matches pattern `query` against HTML tree.
566
+ `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
567
+
568
+ Parameters
569
+ ----------
570
+ query : str
571
+ CSS selector (e.g. "div > :nth-child(2n+1):not(:has(a))").
572
+
573
+ Returns
574
+ -------
575
+ selector : list of `Node` objects
576
+ """
237
577
  ...
238
578
  @overload
239
- def css_first(
240
- self, query: str, default: Any = ..., strict: Literal[True] = ...
241
- ) -> Node: ...
242
- @overload
243
579
  def css_first(
244
580
  self, query: str, default: DefaultT, strict: bool = False
245
581
  ) -> Node | DefaultT: ...
246
582
  @overload
247
583
  def css_first(
248
- self, query: str, default: None = ..., strict: bool = False
249
- ) -> Node | None:
250
- """Same as css but returns only the first match."""
584
+ self, query: str, default: None = None, strict: bool = False
585
+ ) -> Node | None | DefaultT:
586
+ """Same as `css` but returns only the first match.
587
+
588
+ Parameters
589
+ ----------
590
+
591
+ query : str
592
+ default : bool, default None
593
+ Default value to return if there is no match.
594
+ strict: bool, default False
595
+ Set to True if you want to check if there is strictly only one match in the document.
596
+
597
+
598
+ Returns
599
+ -------
600
+ selector : `Node` object
601
+ """
251
602
  ...
252
603
  @property
253
604
  def input_encoding(self) -> str:
254
605
  """Return encoding of the HTML document.
255
606
 
256
- Returns unknown in case the encoding is not determined."""
607
+ Returns `unknown` in case the encoding is not determined.
608
+ """
257
609
  ...
258
610
  @property
259
611
  def root(self) -> Node | None:
@@ -268,16 +620,70 @@ class HTMLParser:
268
620
  """Returns document body."""
269
621
  ...
270
622
  def tags(self, name: str) -> list[Node]:
271
- """Returns a list of tags that match specified name."""
623
+ """Returns a list of tags that match specified name.
624
+
625
+ Parameters
626
+ ----------
627
+ name : str (e.g. div)
628
+ """
272
629
  ...
273
630
  def text(self, deep: bool = True, separator: str = "", strip: bool = False) -> str:
274
- """Returns the text of the node including text of all its child nodes."""
631
+ """Returns the text of the node including text of all its child nodes.
632
+
633
+ Parameters
634
+ ----------
635
+ strip : bool, default False
636
+ If true, calls ``str.strip()`` on each text part to remove extra white spaces.
637
+ separator : str, default ''
638
+ The separator to use when joining text from different nodes.
639
+ deep : bool, default True
640
+ If True, includes text from all child nodes.
641
+
642
+ Returns
643
+ -------
644
+ text : str
645
+ """
646
+ ...
647
+ def strip_tags(self, tags: list[str], recursive: bool = False) -> None:
648
+ """Remove specified tags from the node.
649
+
650
+ Parameters
651
+ ----------
652
+ tags : list of str
653
+ List of tags to remove.
654
+ recursive : bool, default True
655
+ Whenever to delete all its child nodes
656
+
657
+ Examples
658
+ --------
659
+
660
+ >>> tree = HTMLParser('<html><head></head><body><script></script><div>Hello world!</div></body></html>')
661
+ >>> tags = ['head', 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes']
662
+ >>> tree.strip_tags(tags)
663
+ >>> tree.html
664
+ '<html><body><div>Hello world!</div></body></html>'
665
+ """
275
666
  ...
276
- def strip_tags(self, tags: list[str], recursive: bool = False) -> None: ...
277
- def unwrap_tags(self, tags: list[str]) -> None:
667
+ def unwrap_tags(self, tags: list[str], delete_empty: bool = False) -> None:
278
668
  """Unwraps specified tags from the HTML tree.
279
669
 
280
- Works the same as th unwrap method, but applied to a list of tags."""
670
+ Works the same as th unwrap method, but applied to a list of tags.
671
+
672
+ Parameters
673
+ ----------
674
+ tags : list
675
+ List of tags to remove.
676
+ delete_empty : bool, default False
677
+ If True, removes empty tags.
678
+
679
+ Examples
680
+ --------
681
+
682
+ >>> tree = HTMLParser("<div><a href="">Hello</a> <i>world</i>!</div>")
683
+ >>> tree.head.unwrap_tags(['i','a'])
684
+ >>> tree.head.html
685
+ '<body><div>Hello world!</div></body>'
686
+ """
281
687
  ...
282
688
  @property
283
689
  def html(self) -> str | None:
@@ -286,7 +692,16 @@ class HTMLParser:
286
692
  def select(self, query: str | None = None) -> Selector | None:
287
693
  """Select nodes given a CSS selector.
288
694
 
289
- Works similarly to the css method, but supports chained filtering and extra features.
695
+ Works similarly to the ``css`` method, but supports chained filtering and extra features.
696
+
697
+ Parameters
698
+ ----------
699
+ query : str or None
700
+ The CSS selector to use when searching for nodes.
701
+
702
+ Returns
703
+ -------
704
+ selector : The `Selector` class.
290
705
  """
291
706
  ...
292
707
  def any_css_matches(self, selectors: tuple[str]) -> bool:
@@ -295,12 +710,23 @@ class HTMLParser:
295
710
  def scripts_contain(self, query: str) -> bool:
296
711
  """Returns True if any of the script tags contain specified text.
297
712
 
298
- Caches script tags on the first call to improve performance."""
713
+ Caches script tags on the first call to improve performance.
714
+
715
+ Parameters
716
+ ----------
717
+ query : str
718
+ The query to check.
719
+ """
299
720
  ...
300
721
  def scripts_srcs_contain(self, queries: tuple[str]) -> bool:
301
722
  """Returns True if any of the script SRCs attributes contain on of the specified text.
302
723
 
303
- Caches values on the first call to improve performance."""
724
+ Caches values on the first call to improve performance.
725
+
726
+ Parameters
727
+ ----------
728
+ queries : tuple of str
729
+ """
304
730
  ...
305
731
  def css_matches(self, selector: str) -> bool: ...
306
732
  def clone(self) -> HTMLParser:
@@ -309,7 +735,21 @@ class HTMLParser:
309
735
  def merge_text_nodes(self):
310
736
  """Iterates over all text nodes and merges all text nodes that are close to each other.
311
737
 
312
- This is useful for text extraction."""
738
+ This is useful for text extraction.
739
+ Use it when you need to strip HTML tags and merge "dangling" text.
740
+
741
+ Examples
742
+ --------
743
+
744
+ >>> tree = HTMLParser("<div><p><strong>J</strong>ohn</p><p>Doe</p></div>")
745
+ >>> node = tree.css_first('div')
746
+ >>> tree.unwrap_tags(["strong"])
747
+ >>> tree.text(deep=True, separator=" ", strip=True)
748
+ "J ohn Doe" # Text extraction produces an extra space because the strong tag was removed.
749
+ >>> node.merge_text_nodes()
750
+ >>> tree.text(deep=True, separator=" ", strip=True)
751
+ "John Doe"
752
+ """
313
753
  ...
314
754
 
315
755
  def create_tag(tag: str) -> Node: