selectolax 0.3.29__cp312-cp312-win32.whl → 0.3.34__cp312-cp312-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of selectolax might be problematic. Click here for more details.

selectolax/parser.pyi CHANGED
@@ -1,8 +1,10 @@
1
- from typing import Iterator, TypeVar, Literal, overload
1
+ from typing import Iterator, Literal, TypeVar, overload
2
2
 
3
3
  DefaultT = TypeVar("DefaultT")
4
4
 
5
5
  class _Attributes:
6
+ """A dict-like object that represents attributes."""
7
+
6
8
  @staticmethod
7
9
  def create(node: Node, decode_errors: str) -> _Attributes: ...
8
10
  def keys(self) -> Iterator[str]: ...
@@ -22,7 +24,9 @@ class _Attributes:
22
24
  @overload
23
25
  def sget(self, key: str, default: str | DefaultT) -> str | DefaultT: ...
24
26
  @overload
25
- def sget(self, key: str, default: str = "") -> str: ...
27
+ def sget(self, key: str, default: str = "") -> str:
28
+ """Same as get, but returns empty strings instead of None values for empty attributes."""
29
+ ...
26
30
 
27
31
  class Selector:
28
32
  """An advanced CSS selector that supports additional operations.
@@ -69,47 +73,133 @@ class Selector:
69
73
  ...
70
74
 
71
75
  class Node:
76
+ """A class that represents HTML node (element)."""
77
+
72
78
  parser: HTMLParser
73
79
  @property
74
80
  def attributes(self) -> dict[str, str | None]:
75
81
  """Get all attributes that belong to the current node.
76
82
 
77
- The value of empty attributes is None."""
83
+ The value of empty attributes is None.
84
+
85
+ Returns
86
+ -------
87
+ attributes : dictionary of all attributes.
88
+
89
+ Examples
90
+ --------
91
+
92
+ >>> tree = HTMLParser("<div data id='my_id'></div>")
93
+ >>> node = tree.css_first('div')
94
+ >>> node.attributes
95
+ {'data': None, 'id': 'my_id'}
96
+ """
78
97
  ...
79
98
  @property
80
99
  def attrs(self) -> _Attributes:
81
- """A dict-like object that is similar to the attributes property, but operates directly on the Node data."""
100
+ """A dict-like object that is similar to the ``attributes`` property, but operates directly on the Node data.
101
+
102
+ .. warning:: Use ``attributes`` instead, if you don't want to modify Node attributes.
103
+
104
+ Returns
105
+ -------
106
+ attributes : Attributes mapping object.
107
+
108
+ Examples
109
+ --------
110
+
111
+ >>> tree = HTMLParser("<div id='a'></div>")
112
+ >>> node = tree.css_first('div')
113
+ >>> node.attrs
114
+ <div attributes, 1 items>
115
+ >>> node.attrs['id']
116
+ 'a'
117
+ >>> node.attrs['foo'] = 'bar'
118
+ >>> del node.attrs['id']
119
+ >>> node.attributes
120
+ {'foo': 'bar'}
121
+ >>> node.attrs['id'] = 'new_id'
122
+ >>> node.html
123
+ '<div foo="bar" id="new_id"></div>'
124
+ """
82
125
  ...
83
126
  @property
84
127
  def id(self) -> str | None:
85
128
  """Get the id attribute of the node.
86
129
 
87
- Returns None if id does not set."""
130
+ Returns None if id does not set.
131
+
132
+ Returns
133
+ -------
134
+ text : str
135
+ """
88
136
  ...
89
137
 
90
138
  def mem_id(self) -> int:
91
- """Get the mem_id of the node.
139
+ """Get the mem_id attribute of the node.
92
140
 
93
- Returns 0 if mem_id does not set."""
141
+ Returns
142
+ -------
143
+ text : int
144
+ """
94
145
  ...
95
146
 
96
147
  def __hash__(self) -> int:
97
- """ Get the hash of this node
148
+ """Get the hash of this node
98
149
  :return: int
99
150
  """
100
151
  ...
101
152
  def text(self, deep: bool = True, separator: str = "", strip: bool = False) -> str:
102
- """Returns the text of the node including text of all its child nodes."""
153
+ """Returns the text of the node including text of all its child nodes.
154
+
155
+ Parameters
156
+ ----------
157
+ strip : bool, default False
158
+ If true, calls ``str.strip()`` on each text part to remove extra white spaces.
159
+ separator : str, default ''
160
+ The separator to use when joining text from different nodes.
161
+ deep : bool, default True
162
+ If True, includes text from all child nodes.
163
+
164
+ Returns
165
+ -------
166
+ text : str
167
+ """
103
168
  ...
104
169
  def iter(self, include_text: bool = False) -> Iterator[Node]:
105
- """Iterate over nodes on the current level."""
170
+ """Iterate over nodes on the current level.
171
+
172
+ Parameters
173
+ ----------
174
+ include_text : bool
175
+ If True, includes text nodes as well.
176
+
177
+ Yields
178
+ -------
179
+ node
180
+ """
106
181
  ...
107
182
  def traverse(self, include_text: bool = False) -> Iterator[Node]:
108
- """Iterate over all child and next nodes starting from the current level."""
183
+ """Iterate over all child and next nodes starting from the current level.
184
+
185
+ Parameters
186
+ ----------
187
+ include_text : bool
188
+ If True, includes text nodes as well.
189
+
190
+ Yields
191
+ -------
192
+ node
193
+ """
109
194
  ...
110
195
  @property
111
196
  def tag(self) -> str:
112
- """Return the name of the current tag (e.g. div, p, img)."""
197
+ """Return the name of the current tag (e.g. div, p, img).
198
+
199
+ Returns
200
+ -------
201
+ text : str
202
+ """
113
203
  ...
114
204
  @property
115
205
  def child(self) -> Node | None:
@@ -133,7 +223,12 @@ class Node:
133
223
  ...
134
224
  @property
135
225
  def html(self) -> str | None:
136
- """Return HTML representation of the current node including all its child nodes."""
226
+ """Return HTML representation of the current node including all its child nodes.
227
+
228
+ Returns
229
+ -------
230
+ text : str
231
+ """
137
232
  ...
138
233
  def css(self, query: str) -> list[Node]:
139
234
  """Evaluate CSS selector against current node and its child nodes."""
@@ -146,79 +241,314 @@ class Node:
146
241
  ...
147
242
  @overload
148
243
  def css_first(
149
- self, query: str, default: DefaultT, strict: bool = False
150
- ) -> Node | DefaultT:
151
- ...
244
+ self, query: str, default: DefaultT, strict: bool = False
245
+ ) -> Node | DefaultT: ...
152
246
  @overload
153
247
  def css_first(
154
- self, query: str, default: None = None, strict: bool = False
248
+ self, query: str, default: None = None, strict: bool = False
155
249
  ) -> Node | None | DefaultT:
250
+ """Evaluate CSS selector against current node and its child nodes."""
156
251
  ...
157
252
  def decompose(self, recursive: bool = True) -> None:
158
- """Remove a Node from the tree."""
253
+ """Remove a Node from the tree.
254
+
255
+ Parameters
256
+ ----------
257
+ recursive : bool, default True
258
+ Whenever to delete all its child nodes
259
+
260
+ Examples
261
+ --------
262
+
263
+ >>> tree = HTMLParser(html)
264
+ >>> for tag in tree.css('script'):
265
+ >>> tag.decompose()
266
+ """
159
267
  ...
160
268
  def remove(self, recursive: bool = True) -> None:
161
269
  """An alias for the decompose method."""
162
270
  ...
163
- def unwrap(self) -> None:
164
- """Replace node with whatever is inside this node."""
271
+ def unwrap(self, delete_empty: bool = False) -> None:
272
+ """Replace node with whatever is inside this node.
273
+
274
+ Parameters
275
+ ----------
276
+ delete_empty : bool, default False
277
+ Whenever to delete empty tags.
278
+
279
+ Examples
280
+ --------
281
+
282
+ >>> tree = HTMLParser("<div>Hello <i>world</i>!</div>")
283
+ >>> tree.css_first('i').unwrap()
284
+ >>> tree.html
285
+ '<html><head></head><body><div>Hello world!</div></body></html>'
286
+
287
+ Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
288
+ """
165
289
  ...
166
290
  def strip_tags(self, tags: list[str], recursive: bool = False) -> None:
167
- """Remove specified tags from the HTML tree."""
291
+ """Remove specified tags from the HTML tree.
292
+
293
+ Parameters
294
+ ----------
295
+ tags : list
296
+ List of tags to remove.
297
+ recursive : bool, default True
298
+ Whenever to delete all its child nodes
299
+
300
+ Examples
301
+ --------
302
+
303
+ >>> tree = HTMLParser('<html><head></head><body><script></script><div>Hello world!</div></body></html>')
304
+ >>> tags = ['head', 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes']
305
+ >>> tree.strip_tags(tags)
306
+ >>> tree.html
307
+ '<html><body><div>Hello world!</div></body></html>'
308
+ """
168
309
  ...
169
310
  def unwrap_tags(self, tags: list[str], delete_empty: bool = False) -> None:
170
311
  """Unwraps specified tags from the HTML tree.
171
312
 
172
- Works the same as the unwrap method, but applied to a list of tags."""
313
+ Works the same as the unwrap method, but applied to a list of tags.
314
+
315
+ Parameters
316
+ ----------
317
+ tags : list
318
+ List of tags to remove.
319
+ delete_empty : bool, default False
320
+ Whenever to delete empty tags.
321
+
322
+ Examples
323
+ --------
324
+
325
+ >>> tree = HTMLParser("<div><a href="">Hello</a> <i>world</i>!</div>")
326
+ >>> tree.body.unwrap_tags(['i','a'])
327
+ >>> tree.body.html
328
+ '<body><div>Hello world!</div></body>'
329
+
330
+ Note: by default, empty tags are ignored, set "delete_empty" to "True" to change this.
331
+ """
173
332
  ...
174
333
  def replace_with(self, value: str | bytes | None) -> None:
175
- """Replace current Node with specified value."""
334
+ """Replace current Node with specified value.
335
+
336
+ Parameters
337
+ ----------
338
+ value : str, bytes or Node
339
+ The text or Node instance to replace the Node with.
340
+ When a text string is passed, it's treated as text. All HTML tags will be escaped.
341
+ Convert and pass the ``Node`` object when you want to work with HTML.
342
+ Does not clone the ``Node`` object.
343
+ All future changes to the passed ``Node`` object will also be taken into account.
344
+
345
+ Examples
346
+ --------
347
+
348
+ >>> tree = HTMLParser('<div>Get <img src="" alt="Laptop"></div>')
349
+ >>> img = tree.css_first('img')
350
+ >>> img.replace_with(img.attributes.get('alt', ''))
351
+ >>> tree.body.child.html
352
+ '<div>Get Laptop</div>'
353
+
354
+ >>> html_parser = HTMLParser('<div>Get <span alt="Laptop"><img src="/jpg"> <div></div></span></div>')
355
+ >>> html_parser2 = HTMLParser('<div>Test</div>')
356
+ >>> img_node = html_parser.css_first('img')
357
+ >>> img_node.replace_with(html_parser2.body.child)
358
+ '<div>Get <span alt="Laptop"><div>Test</div> <div></div></span></div>'
359
+ """
176
360
  ...
177
361
  def insert_before(self, value: str | bytes | None) -> None:
178
- """Insert a node before the current Node."""
362
+ """Insert a node before the current Node.
363
+
364
+ Parameters
365
+ ----------
366
+ value : str, bytes or Node
367
+ The text or Node instance to insert before the Node.
368
+ When a text string is passed, it's treated as text. All HTML tags will be escaped.
369
+ Convert and pass the ``Node`` object when you want to work with HTML.
370
+ Does not clone the ``Node`` object.
371
+ All future changes to the passed ``Node`` object will also be taken into account.
372
+
373
+ Examples
374
+ --------
375
+
376
+ >>> tree = HTMLParser('<div>Get <img src="" alt="Laptop"></div>')
377
+ >>> img = tree.css_first('img')
378
+ >>> img.insert_before(img.attributes.get('alt', ''))
379
+ >>> tree.body.child.html
380
+ '<div>Get Laptop<img src="" alt="Laptop"></div>'
381
+
382
+ >>> html_parser = HTMLParser('<div>Get <span alt="Laptop"><img src="/jpg"> <div></div></span></div>')
383
+ >>> html_parser2 = HTMLParser('<div>Test</div>')
384
+ >>> img_node = html_parser.css_first('img')
385
+ >>> img_node.insert_before(html_parser2.body.child)
386
+ <div>Get <span alt="Laptop"><div>Test</div><img src="/jpg"> <div></div></span></div>'
387
+ """
179
388
  ...
180
389
  def insert_after(self, value: str | bytes | None) -> None:
181
- """Insert a node after the current Node."""
390
+ """Insert a node after the current Node.
391
+
392
+ Parameters
393
+ ----------
394
+ value : str, bytes or Node
395
+ The text or Node instance to insert after the Node.
396
+ When a text string is passed, it's treated as text. All HTML tags will be escaped.
397
+ Convert and pass the ``Node`` object when you want to work with HTML.
398
+ Does not clone the ``Node`` object.
399
+ All future changes to the passed ``Node`` object will also be taken into account.
400
+
401
+ Examples
402
+ --------
403
+
404
+ >>> tree = HTMLParser('<div>Get <img src="" alt="Laptop"></div>')
405
+ >>> img = tree.css_first('img')
406
+ >>> img.insert_after(img.attributes.get('alt', ''))
407
+ >>> tree.body.child.html
408
+ '<div>Get <img src="" alt="Laptop">Laptop</div>'
409
+
410
+ >>> html_parser = HTMLParser('<div>Get <span alt="Laptop"><img src="/jpg"> <div></div></span></div>')
411
+ >>> html_parser2 = HTMLParser('<div>Test</div>')
412
+ >>> img_node = html_parser.css_first('img')
413
+ >>> img_node.insert_after(html_parser2.body.child)
414
+ <div>Get <span alt="Laptop"><img src="/jpg"><div>Test</div> <div></div></span></div>'
415
+ """
182
416
  ...
183
417
  def insert_child(self, value: str | bytes | None) -> None:
184
- """Insert a node inside (at the end of) the current Node.."""
418
+ """Insert a node inside (at the end of) the current Node.
419
+
420
+ Parameters
421
+ ----------
422
+ value : str, bytes or Node
423
+ The text or Node instance to insert inside the Node.
424
+ When a text string is passed, it's treated as text. All HTML tags will be escaped.
425
+ Convert and pass the ``Node`` object when you want to work with HTML.
426
+ Does not clone the ``Node`` object.
427
+ All future changes to the passed ``Node`` object will also be taken into account.
428
+
429
+ Examples
430
+ --------
431
+
432
+ >>> tree = HTMLParser('<div>Get <img src=""></div>')
433
+ >>> div = tree.css_first('div')
434
+ >>> div.insert_child('Laptop')
435
+ >>> tree.body.child.html
436
+ '<div>Get <img src="">Laptop</div>'
437
+
438
+ >>> html_parser = HTMLParser('<div>Get <span alt="Laptop"> <div>Laptop</div> </span></div>')
439
+ >>> html_parser2 = HTMLParser('<div>Test</div>')
440
+ >>> span_node = html_parser.css_first('span')
441
+ >>> span_node.insert_child(html_parser2.body.child)
442
+ <div>Get <span alt="Laptop"> <div>Laptop</div> <div>Test</div> </span></div>'
443
+ """
185
444
  ...
186
445
  @property
187
446
  def raw_value(self) -> bytes:
188
447
  """Return the raw (unparsed, original) value of a node.
189
448
 
190
- Currently, works on text nodes only."""
449
+ Currently, works on text nodes only.
450
+
451
+ Returns
452
+ -------
453
+
454
+ raw_value : bytes
455
+
456
+ Examples
457
+ --------
458
+
459
+ >>> html_parser = HTMLParser('<div>&#x3C;test&#x3E;</div>')
460
+ >>> selector = html_parser.css_first('div')
461
+ >>> selector.child.html
462
+ '&lt;test&gt;'
463
+ >>> selector.child.raw_value
464
+ b'&#x3C;test&#x3E;'
465
+ """
191
466
  ...
192
467
  def select(self, query: str | None = None) -> Selector:
193
468
  """Select nodes given a CSS selector.
194
469
 
195
470
  Works similarly to the css method, but supports chained filtering and extra features.
471
+
472
+ Parameters
473
+ ----------
474
+ query : str or None
475
+ The CSS selector to use when searching for nodes.
476
+
477
+ Returns
478
+ -------
479
+ selector : The `Selector` class.
196
480
  """
197
481
  ...
198
482
  def scripts_contain(self, query: str) -> bool:
199
483
  """Returns True if any of the script tags contain specified text.
200
484
 
201
- Caches script tags on the first call to improve performance."""
485
+ Caches script tags on the first call to improve performance.
486
+
487
+ Parameters
488
+ ----------
489
+ query : str
490
+ The query to check.
491
+ """
202
492
  ...
203
493
  def script_srcs_contain(self, queries: tuple[str]) -> bool:
204
494
  """Returns True if any of the script SRCs attributes contain on of the specified text.
205
495
 
206
- Caches values on the first call to improve performance."""
496
+ Caches values on the first call to improve performance.
497
+
498
+ Parameters
499
+ ----------
500
+ queries : tuple of str
501
+ """
207
502
  ...
208
503
  @property
209
504
  def text_content(self) -> str | None:
210
505
  """Returns the text of the node if it is a text node.
211
506
 
212
- Returns None for other nodes. Unlike the text method, does not include child nodes.
507
+ Returns None for other nodes.
508
+ Unlike the ``text`` method, does not include child nodes.
509
+
510
+ Returns
511
+ -------
512
+ text : str or None.
213
513
  """
214
514
  ...
215
515
  def merge_text_nodes(self):
216
516
  """Iterates over all text nodes and merges all text nodes that are close to each other.
217
517
 
218
- This is useful for text extraction."""
518
+ This is useful for text extraction.
519
+ Use it when you need to strip HTML tags and merge "dangling" text.
520
+
521
+ Examples
522
+ --------
523
+
524
+ >>> tree = HTMLParser("<div><p><strong>J</strong>ohn</p><p>Doe</p></div>")
525
+ >>> node = tree.css_first('div')
526
+ >>> tree.unwrap_tags(["strong"])
527
+ >>> tree.text(deep=True, separator=" ", strip=True)
528
+ "J ohn Doe" # Text extraction produces an extra space because the strong tag was removed.
529
+ >>> node.merge_text_nodes()
530
+ >>> tree.text(deep=True, separator=" ", strip=True)
531
+ "John Doe"
532
+ """
219
533
  ...
220
534
 
221
535
  class HTMLParser:
536
+ """The HTML parser.
537
+
538
+ Use this class to parse raw HTML.
539
+
540
+ Parameters
541
+ ----------
542
+
543
+ html : str (unicode) or bytes
544
+ detect_encoding : bool, default True
545
+ If `True` and html type is `bytes` then encoding will be detected automatically.
546
+ use_meta_tags : bool, default True
547
+ Whether to use meta tags in encoding detection process.
548
+ decode_errors : str, default 'ignore'
549
+ Same as in builtin's str.decode, i.e 'strict', 'ignore' or 'replace'.
550
+ """
551
+
222
552
  def __init__(
223
553
  self,
224
554
  html: bytes | str,
@@ -229,24 +559,50 @@ class HTMLParser:
229
559
  def css(self, query: str) -> list[Node]:
230
560
  """A CSS selector.
231
561
 
232
- Matches pattern query against HTML tree."""
562
+ Matches pattern `query` against HTML tree.
563
+ `CSS selectors reference <https://www.w3schools.com/cssref/css_selectors.asp>`_.
564
+
565
+ Parameters
566
+ ----------
567
+ query : str
568
+ CSS selector (e.g. "div > :nth-child(2n+1):not(:has(a))").
569
+
570
+ Returns
571
+ -------
572
+ selector : list of `Node` objects
573
+ """
233
574
  ...
234
575
  @overload
235
576
  def css_first(
236
577
  self, query: str, default: DefaultT, strict: bool = False
237
- ) -> Node | DefaultT:
238
- ...
239
-
578
+ ) -> Node | DefaultT: ...
240
579
  @overload
241
580
  def css_first(
242
- self, query: str, default: None = None, strict: bool = False
581
+ self, query: str, default: None = None, strict: bool = False
243
582
  ) -> Node | None | DefaultT:
583
+ """Same as `css` but returns only the first match.
584
+
585
+ Parameters
586
+ ----------
587
+
588
+ query : str
589
+ default : bool, default None
590
+ Default value to return if there is no match.
591
+ strict: bool, default True
592
+ Set to True if you want to check if there is strictly only one match in the document.
593
+
594
+
595
+ Returns
596
+ -------
597
+ selector : `Node` object
598
+ """
244
599
  ...
245
600
  @property
246
601
  def input_encoding(self) -> str:
247
602
  """Return encoding of the HTML document.
248
603
 
249
- Returns unknown in case the encoding is not determined."""
604
+ Returns `unknown` in case the encoding is not determined.
605
+ """
250
606
  ...
251
607
  @property
252
608
  def root(self) -> Node | None:
@@ -261,16 +617,70 @@ class HTMLParser:
261
617
  """Returns document body."""
262
618
  ...
263
619
  def tags(self, name: str) -> list[Node]:
264
- """Returns a list of tags that match specified name."""
620
+ """Returns a list of tags that match specified name.
621
+
622
+ Parameters
623
+ ----------
624
+ name : str (e.g. div)
625
+ """
265
626
  ...
266
627
  def text(self, deep: bool = True, separator: str = "", strip: bool = False) -> str:
267
- """Returns the text of the node including text of all its child nodes."""
628
+ """Returns the text of the node including text of all its child nodes.
629
+
630
+ Parameters
631
+ ----------
632
+ strip : bool, default False
633
+ If true, calls ``str.strip()`` on each text part to remove extra white spaces.
634
+ separator : str, default ''
635
+ The separator to use when joining text from different nodes.
636
+ deep : bool, default True
637
+ If True, includes text from all child nodes.
638
+
639
+ Returns
640
+ -------
641
+ text : str
642
+ """
643
+ ...
644
+ def strip_tags(self, tags: list[str], recursive: bool = False) -> None:
645
+ """Remove specified tags from the node.
646
+
647
+ Parameters
648
+ ----------
649
+ tags : list of str
650
+ List of tags to remove.
651
+ recursive : bool, default True
652
+ Whenever to delete all its child nodes
653
+
654
+ Examples
655
+ --------
656
+
657
+ >>> tree = HTMLParser('<html><head></head><body><script></script><div>Hello world!</div></body></html>')
658
+ >>> tags = ['head', 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes']
659
+ >>> tree.strip_tags(tags)
660
+ >>> tree.html
661
+ '<html><body><div>Hello world!</div></body></html>'
662
+ """
268
663
  ...
269
- def strip_tags(self, tags: list[str], recursive: bool = False) -> None: ...
270
664
  def unwrap_tags(self, tags: list[str], delete_empty: bool = False) -> None:
271
665
  """Unwraps specified tags from the HTML tree.
272
666
 
273
- Works the same as th unwrap method, but applied to a list of tags."""
667
+ Works the same as th unwrap method, but applied to a list of tags.
668
+
669
+ Parameters
670
+ ----------
671
+ tags : list
672
+ List of tags to remove.
673
+ delete_empty : bool, default False
674
+ If True, removes empty tags.
675
+
676
+ Examples
677
+ --------
678
+
679
+ >>> tree = HTMLParser("<div><a href="">Hello</a> <i>world</i>!</div>")
680
+ >>> tree.head.unwrap_tags(['i','a'])
681
+ >>> tree.head.html
682
+ '<body><div>Hello world!</div></body>'
683
+ """
274
684
  ...
275
685
  @property
276
686
  def html(self) -> str | None:
@@ -279,7 +689,16 @@ class HTMLParser:
279
689
  def select(self, query: str | None = None) -> Selector | None:
280
690
  """Select nodes given a CSS selector.
281
691
 
282
- Works similarly to the css method, but supports chained filtering and extra features.
692
+ Works similarly to the ``css`` method, but supports chained filtering and extra features.
693
+
694
+ Parameters
695
+ ----------
696
+ query : str or None
697
+ The CSS selector to use when searching for nodes.
698
+
699
+ Returns
700
+ -------
701
+ selector : The `Selector` class.
283
702
  """
284
703
  ...
285
704
  def any_css_matches(self, selectors: tuple[str]) -> bool:
@@ -288,12 +707,23 @@ class HTMLParser:
288
707
  def scripts_contain(self, query: str) -> bool:
289
708
  """Returns True if any of the script tags contain specified text.
290
709
 
291
- Caches script tags on the first call to improve performance."""
710
+ Caches script tags on the first call to improve performance.
711
+
712
+ Parameters
713
+ ----------
714
+ query : str
715
+ The query to check.
716
+ """
292
717
  ...
293
718
  def scripts_srcs_contain(self, queries: tuple[str]) -> bool:
294
719
  """Returns True if any of the script SRCs attributes contain on of the specified text.
295
720
 
296
- Caches values on the first call to improve performance."""
721
+ Caches values on the first call to improve performance.
722
+
723
+ Parameters
724
+ ----------
725
+ queries : tuple of str
726
+ """
297
727
  ...
298
728
  def css_matches(self, selector: str) -> bool: ...
299
729
  def clone(self) -> HTMLParser:
@@ -302,7 +732,21 @@ class HTMLParser:
302
732
  def merge_text_nodes(self):
303
733
  """Iterates over all text nodes and merges all text nodes that are close to each other.
304
734
 
305
- This is useful for text extraction."""
735
+ This is useful for text extraction.
736
+ Use it when you need to strip HTML tags and merge "dangling" text.
737
+
738
+ Examples
739
+ --------
740
+
741
+ >>> tree = HTMLParser("<div><p><strong>J</strong>ohn</p><p>Doe</p></div>")
742
+ >>> node = tree.css_first('div')
743
+ >>> tree.unwrap_tags(["strong"])
744
+ >>> tree.text(deep=True, separator=" ", strip=True)
745
+ "J ohn Doe" # Text extraction produces an extra space because the strong tag was removed.
746
+ >>> node.merge_text_nodes()
747
+ >>> tree.text(deep=True, separator=" ", strip=True)
748
+ "John Doe"
749
+ """
306
750
  ...
307
751
 
308
752
  def create_tag(tag: str) -> Node: