html-to-markdown 1.12.0__tar.gz → 1.12.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of html-to-markdown might be problematic. Click here for more details.

Files changed (22) hide show
  1. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/PKG-INFO +45 -1
  2. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/README.md +44 -0
  3. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown/converters.py +4 -13
  4. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown/processing.py +27 -14
  5. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown/whitespace.py +17 -3
  6. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown.egg-info/PKG-INFO +45 -1
  7. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/pyproject.toml +11 -1
  8. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/LICENSE +0 -0
  9. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown/__init__.py +0 -0
  10. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown/__main__.py +0 -0
  11. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown/cli.py +0 -0
  12. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown/constants.py +0 -0
  13. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown/exceptions.py +0 -0
  14. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown/preprocessor.py +0 -0
  15. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown/py.typed +0 -0
  16. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown/utils.py +0 -0
  17. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown.egg-info/SOURCES.txt +0 -0
  18. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown.egg-info/dependency_links.txt +0 -0
  19. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown.egg-info/entry_points.txt +0 -0
  20. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown.egg-info/requires.txt +0 -0
  21. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/html_to_markdown.egg-info/top_level.txt +0 -0
  22. {html_to_markdown-1.12.0 → html_to_markdown-1.12.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 1.12.0
3
+ Version: 1.12.1
4
4
  Summary: A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options
5
5
  Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
6
6
  License: MIT
@@ -348,6 +348,50 @@ def show_progress(processed: int, total: int):
348
348
  markdown = convert_to_markdown(html_content, stream_processing=True, chunk_size=4096, progress_callback=show_progress)
349
349
  ```
350
350
 
351
+ #### When to Use Streaming vs Regular Processing
352
+
353
+ Based on comprehensive performance analysis, here are our recommendations:
354
+
355
+ **📄 Use Regular Processing When:**
356
+
357
+ - Files < 100KB (simplicity preferred)
358
+ - Simple scripts and one-off conversions
359
+ - Memory is not a concern
360
+ - You want the simplest API
361
+
362
+ **🌊 Use Streaming Processing When:**
363
+
364
+ - Files > 100KB (memory efficiency)
365
+ - Processing many files in batch
366
+ - Memory is constrained
367
+ - You need progress reporting
368
+ - You want to process results incrementally
369
+ - Running in production environments
370
+
371
+ **📋 Specific Recommendations by File Size:**
372
+
373
+ | File Size | Recommendation | Reason |
374
+ | ---------- | ----------------------------------------------- | -------------------------------------- |
375
+ | < 50KB | Regular (simplicity) or Streaming (3-5% faster) | Either works well |
376
+ | 50KB-100KB | Either (streaming slightly preferred) | Minimal difference |
377
+ | 100KB-1MB | Streaming preferred | Better performance + memory efficiency |
378
+ | > 1MB | Streaming strongly recommended | Significant memory advantages |
379
+
380
+ **🔧 Configuration Recommendations:**
381
+
382
+ - **Default chunk_size: 2048 bytes** (optimal performance balance)
383
+ - **For very large files (>10MB)**: Consider `chunk_size=4096`
384
+ - **For memory-constrained environments**: Use smaller chunks `chunk_size=1024`
385
+
386
+ **📈 Performance Benefits:**
387
+
388
+ Streaming provides consistent **3-5% performance improvement** across all file sizes:
389
+
390
+ - **Streaming throughput**: ~0.47-0.48 MB/s
391
+ - **Regular throughput**: ~0.44-0.47 MB/s
392
+ - **Memory usage**: Streaming uses less peak memory for large files
393
+ - **Latency**: Streaming allows processing results before completion
394
+
351
395
  ### Preprocessing API
352
396
 
353
397
  The library provides functions for preprocessing HTML before conversion, useful for cleaning messy or complex HTML:
@@ -310,6 +310,50 @@ def show_progress(processed: int, total: int):
310
310
  markdown = convert_to_markdown(html_content, stream_processing=True, chunk_size=4096, progress_callback=show_progress)
311
311
  ```
312
312
 
313
+ #### When to Use Streaming vs Regular Processing
314
+
315
+ Based on comprehensive performance analysis, here are our recommendations:
316
+
317
+ **📄 Use Regular Processing When:**
318
+
319
+ - Files < 100KB (simplicity preferred)
320
+ - Simple scripts and one-off conversions
321
+ - Memory is not a concern
322
+ - You want the simplest API
323
+
324
+ **🌊 Use Streaming Processing When:**
325
+
326
+ - Files > 100KB (memory efficiency)
327
+ - Processing many files in batch
328
+ - Memory is constrained
329
+ - You need progress reporting
330
+ - You want to process results incrementally
331
+ - Running in production environments
332
+
333
+ **📋 Specific Recommendations by File Size:**
334
+
335
+ | File Size | Recommendation | Reason |
336
+ | ---------- | ----------------------------------------------- | -------------------------------------- |
337
+ | < 50KB | Regular (simplicity) or Streaming (3-5% faster) | Either works well |
338
+ | 50KB-100KB | Either (streaming slightly preferred) | Minimal difference |
339
+ | 100KB-1MB | Streaming preferred | Better performance + memory efficiency |
340
+ | > 1MB | Streaming strongly recommended | Significant memory advantages |
341
+
342
+ **🔧 Configuration Recommendations:**
343
+
344
+ - **Default chunk_size: 2048 bytes** (optimal performance balance)
345
+ - **For very large files (>10MB)**: Consider `chunk_size=4096`
346
+ - **For memory-constrained environments**: Use smaller chunks `chunk_size=1024`
347
+
348
+ **📈 Performance Benefits:**
349
+
350
+ Streaming provides consistent **3-5% performance improvement** across all file sizes:
351
+
352
+ - **Streaming throughput**: ~0.47-0.48 MB/s
353
+ - **Regular throughput**: ~0.44-0.47 MB/s
354
+ - **Memory usage**: Streaming uses less peak memory for large files
355
+ - **Latency**: Streaming allows processing results before completion
356
+
313
357
  ### Preprocessing API
314
358
 
315
359
  The library provides functions for preprocessing HTML before conversion, useful for cleaning messy or complex HTML:
@@ -39,7 +39,6 @@ def _format_wrapped_block(text: str, start_marker: str, end_marker: str = "") ->
39
39
 
40
40
 
41
41
  def _find_list_item_ancestor(tag: Tag) -> Tag | None:
42
- """Find the nearest list item ancestor of a tag."""
43
42
  parent = tag.parent
44
43
  while parent and parent.name != "li":
45
44
  parent = parent.parent
@@ -231,14 +230,15 @@ def _convert_blockquote(*, text: str, tag: Tag, convert_as_inline: bool, list_in
231
230
  return quote_text
232
231
 
233
232
 
234
- def _convert_br(*, convert_as_inline: bool, newline_style: str, tag: Tag) -> str:
233
+ def _convert_br(*, convert_as_inline: bool, newline_style: str, tag: Tag, text: str) -> str:
235
234
  from html_to_markdown.processing import _has_ancestor # noqa: PLC0415
236
235
 
237
236
  if _has_ancestor(tag, ["h1", "h2", "h3", "h4", "h5", "h6"]):
238
- return " "
237
+ return " " + text.strip()
239
238
 
240
239
  _ = convert_as_inline
241
- return "\\\n" if newline_style.lower() == BACKSLASH else " \n"
240
+ newline = "\\\n" if newline_style.lower() == BACKSLASH else " \n"
241
+ return newline + text.strip() if text.strip() else newline
242
242
 
243
243
 
244
244
  def _convert_hn(
@@ -286,7 +286,6 @@ def _convert_img(*, tag: Tag, convert_as_inline: bool, keep_inline_images_in: It
286
286
 
287
287
 
288
288
  def _has_block_list_items(tag: Tag) -> bool:
289
- """Check if any list items contain block elements."""
290
289
  return any(
291
290
  any(child.name in BLOCK_ELEMENTS for child in li.children if hasattr(child, "name"))
292
291
  for li in tag.find_all("li", recursive=False)
@@ -294,7 +293,6 @@ def _has_block_list_items(tag: Tag) -> bool:
294
293
 
295
294
 
296
295
  def _handle_nested_list_indentation(text: str, list_indent_str: str, parent: Tag) -> str:
297
- """Handle indentation for lists nested within list items."""
298
296
  prev_p = None
299
297
  for child in parent.children:
300
298
  if hasattr(child, "name"):
@@ -310,7 +308,6 @@ def _handle_nested_list_indentation(text: str, list_indent_str: str, parent: Tag
310
308
 
311
309
 
312
310
  def _handle_direct_nested_list_indentation(text: str, list_indent_str: str) -> str:
313
- """Handle indentation for lists that are direct children of other lists."""
314
311
  lines = text.strip().split("\n")
315
312
  indented_lines = [f"{list_indent_str}{line}" if line.strip() else "" for line in lines]
316
313
  result = "\n".join(indented_lines)
@@ -318,7 +315,6 @@ def _handle_direct_nested_list_indentation(text: str, list_indent_str: str) -> s
318
315
 
319
316
 
320
317
  def _add_list_item_spacing(text: str) -> str:
321
- """Add extra spacing between list items that contain block content."""
322
318
  lines = text.split("\n")
323
319
  items_with_blocks = set()
324
320
 
@@ -482,7 +478,6 @@ def _convert_pre(
482
478
 
483
479
 
484
480
  def _process_table_cell_content(*, tag: Tag, text: str, br_in_tables: bool) -> str:
485
- """Process table cell content, optionally using <br> tags for multi-line content."""
486
481
  if br_in_tables:
487
482
  block_children = [child for child in tag.children if hasattr(child, "name") and child.name in BLOCK_ELEMENTS]
488
483
 
@@ -510,7 +505,6 @@ def _convert_th(*, tag: Tag, text: str, br_in_tables: bool = False) -> str:
510
505
 
511
506
 
512
507
  def _get_rowspan_positions(prev_cells: list[Tag]) -> tuple[list[int], int]:
513
- """Get positions of cells with rowspan > 1 from previous row."""
514
508
  rowspan_positions = []
515
509
  col_pos = 0
516
510
 
@@ -531,7 +525,6 @@ def _get_rowspan_positions(prev_cells: list[Tag]) -> tuple[list[int], int]:
531
525
 
532
526
 
533
527
  def _handle_rowspan_text(text: str, rowspan_positions: list[int], col_pos: int) -> str:
534
- """Handle text adjustment for rows with rowspan cells."""
535
528
  converted_cells = [part.rstrip() + " |" for part in text.split("|")[:-1] if part] if text.strip() else []
536
529
  rowspan_set = set(rowspan_positions)
537
530
 
@@ -542,7 +535,6 @@ def _handle_rowspan_text(text: str, rowspan_positions: list[int], col_pos: int)
542
535
 
543
536
 
544
537
  def _is_header_row(tag: Tag, cells: list[Tag], parent_name: str, tag_grand_parent: Tag | None) -> bool:
545
- """Determine if this table row should be treated as a header row."""
546
538
  return (
547
539
  all(hasattr(cell, "name") and cell.name == "th" for cell in cells)
548
540
  or (not tag.previous_sibling and parent_name != "tbody")
@@ -555,7 +547,6 @@ def _is_header_row(tag: Tag, cells: list[Tag], parent_name: str, tag_grand_paren
555
547
 
556
548
 
557
549
  def _calculate_total_colspan(cells: list[Tag]) -> int:
558
- """Calculate total colspan for all cells in a row."""
559
550
  full_colspan = 0
560
551
  for cell in cells:
561
552
  if hasattr(cell, "attrs") and "colspan" in cell.attrs:
@@ -11,7 +11,7 @@ from io import StringIO
11
11
  from itertools import chain
12
12
  from typing import TYPE_CHECKING, Any, Literal, cast
13
13
 
14
- from bs4 import BeautifulSoup, Comment, Doctype, Tag
14
+ from bs4 import BeautifulSoup, CData, Comment, Doctype, Tag
15
15
  from bs4.element import NavigableString, PageElement
16
16
 
17
17
  try:
@@ -179,6 +179,7 @@ def _process_tag(
179
179
  strip: set[str] | None,
180
180
  whitespace_handler: WhitespaceHandler,
181
181
  context_before: str = "",
182
+ ancestor_names: set[str] | None = None,
182
183
  ) -> str:
183
184
  should_convert_tag = _should_convert_tag(tag_name=tag.name, strip=strip, convert=convert)
184
185
  tag_name: SupportedTag | None = (
@@ -186,6 +187,17 @@ def _process_tag(
186
187
  )
187
188
  text_parts: list[str] = []
188
189
 
190
+ if ancestor_names is None:
191
+ ancestor_names = set()
192
+ current: Tag | None = tag
193
+ while current and hasattr(current, "name"):
194
+ if current.name:
195
+ ancestor_names.add(current.name)
196
+ current = getattr(current, "parent", None)
197
+
198
+ if len(ancestor_names) > 10:
199
+ break
200
+
189
201
  is_heading = html_heading_re.match(tag.name) is not None
190
202
  is_cell = tag_name in {"td", "th"}
191
203
  convert_children_as_inline = convert_as_inline or is_heading or is_cell
@@ -201,7 +213,7 @@ def _process_tag(
201
213
  if can_extract and isinstance(el, NavigableString) and not el.strip():
202
214
  el.extract()
203
215
 
204
- children = list(filter(lambda value: not isinstance(value, (Comment, Doctype)), tag.children))
216
+ children = list(filter(lambda value: not isinstance(value, (Comment, Doctype, CData)), tag.children))
205
217
 
206
218
  empty_when_no_content_tags = {"abbr", "var", "ins", "dfn", "time", "data", "cite", "q", "mark", "small", "u"}
207
219
 
@@ -227,6 +239,7 @@ def _process_tag(
227
239
  escape_asterisks=escape_asterisks,
228
240
  escape_underscores=escape_underscores,
229
241
  whitespace_handler=whitespace_handler,
242
+ ancestor_names=ancestor_names,
230
243
  )
231
244
  )
232
245
  elif isinstance(el, Tag):
@@ -243,6 +256,7 @@ def _process_tag(
243
256
  strip=strip,
244
257
  whitespace_handler=whitespace_handler,
245
258
  context_before=(context_before + current_text)[-2:],
259
+ ancestor_names=ancestor_names,
246
260
  )
247
261
  )
248
262
 
@@ -282,21 +296,23 @@ def _process_text(
282
296
  escape_asterisks: bool,
283
297
  escape_underscores: bool,
284
298
  whitespace_handler: WhitespaceHandler,
299
+ ancestor_names: set[str] | None = None,
285
300
  ) -> str:
286
301
  text = str(el) or ""
287
302
 
288
303
  parent = el.parent
289
304
  parent_name = parent.name if parent else None
290
305
 
291
- ancestor_names = set()
292
- current = parent
293
- while current and hasattr(current, "name"):
294
- if current.name:
295
- ancestor_names.add(current.name)
296
- current = getattr(current, "parent", None)
306
+ if ancestor_names is None:
307
+ ancestor_names = set()
308
+ current = parent
309
+ while current and hasattr(current, "name"):
310
+ if current.name:
311
+ ancestor_names.add(current.name)
312
+ current = getattr(current, "parent", None)
297
313
 
298
- if len(ancestor_names) > 10:
299
- break
314
+ if len(ancestor_names) > 10:
315
+ break
300
316
 
301
317
  in_pre = bool(ancestor_names.intersection({"pre"}))
302
318
 
@@ -469,7 +485,6 @@ def convert_to_markdown(
469
485
  wrap_width: int = 80,
470
486
  ) -> str:
471
487
  """Convert HTML content to Markdown format.
472
-
473
488
  This is the main entry point for converting HTML to Markdown. It supports
474
489
  various customization options for controlling the conversion behavior.
475
490
 
@@ -525,11 +540,9 @@ def convert_to_markdown(
525
540
  >>> html = "<h1>Title</h1><p>Content</p>"
526
541
  >>> convert_to_markdown(html)
527
542
  'Title\\n=====\\n\\nContent\\n\\n'
528
-
529
543
  With custom options:
530
544
  >>> convert_to_markdown(html, heading_style="atx", list_indent_width=2)
531
545
  '# Title\\n\\nContent\\n\\n'
532
-
533
546
  Discord-compatible lists (2-space indent):
534
547
  >>> html = "<ul><li>Item 1</li><li>Item 2</li></ul>"
535
548
  >>> convert_to_markdown(html, list_indent_width=2)
@@ -896,7 +909,7 @@ def _process_html_core(
896
909
  elements_to_process = body.children if body and isinstance(body, Tag) else source.children
897
910
 
898
911
  context = ""
899
- for el in filter(lambda value: not isinstance(value, (Comment, Doctype)), elements_to_process):
912
+ for el in filter(lambda value: not isinstance(value, (Comment, Doctype, CData)), elements_to_process):
900
913
  if isinstance(el, NavigableString):
901
914
  text = _process_text(
902
915
  el=el,
@@ -6,8 +6,10 @@ import re
6
6
  import unicodedata
7
7
  from typing import TYPE_CHECKING, Literal
8
8
 
9
+ from bs4.element import NavigableString
10
+
9
11
  if TYPE_CHECKING:
10
- from bs4 import NavigableString, PageElement
12
+ from bs4 import PageElement
11
13
 
12
14
 
13
15
  WhitespaceMode = Literal["normalized", "strict"]
@@ -128,6 +130,8 @@ class WhitespaceHandler:
128
130
  def normalize_unicode_spaces(self, text: str) -> str:
129
131
  text = self._unicode_spaces.sub(" ", text)
130
132
 
133
+ text = text.replace("\r\n", "\n")
134
+
131
135
  normalized = []
132
136
  for char in text:
133
137
  if unicodedata.category(char) in ("Zs", "Zl", "Zp"):
@@ -250,12 +254,22 @@ class WhitespaceHandler:
250
254
  has_leading = (
251
255
  has_lead_space
252
256
  and original[0] == " "
253
- and (self.is_inline_element(prev_sibling) or self.is_block_element(prev_sibling) or prev_sibling is None)
257
+ and (
258
+ self.is_inline_element(prev_sibling)
259
+ or self.is_block_element(prev_sibling)
260
+ or prev_sibling is None
261
+ or isinstance(prev_sibling, NavigableString)
262
+ )
254
263
  )
255
264
  has_trailing = (
256
265
  has_trail_space
257
266
  and original[-1] == " "
258
- and (self.is_inline_element(next_sibling) or self.is_block_element(next_sibling) or next_sibling is None)
267
+ and (
268
+ self.is_inline_element(next_sibling)
269
+ or self.is_block_element(next_sibling)
270
+ or next_sibling is None
271
+ or isinstance(next_sibling, NavigableString)
272
+ )
259
273
  )
260
274
 
261
275
  if original and original[0] in "\n\t" and self.is_inline_element(prev_sibling):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: html-to-markdown
3
- Version: 1.12.0
3
+ Version: 1.12.1
4
4
  Summary: A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options
5
5
  Author-email: Na'aman Hirschfeld <nhirschfeld@gmail.com>
6
6
  License: MIT
@@ -348,6 +348,50 @@ def show_progress(processed: int, total: int):
348
348
  markdown = convert_to_markdown(html_content, stream_processing=True, chunk_size=4096, progress_callback=show_progress)
349
349
  ```
350
350
 
351
+ #### When to Use Streaming vs Regular Processing
352
+
353
+ Based on comprehensive performance analysis, here are our recommendations:
354
+
355
+ **📄 Use Regular Processing When:**
356
+
357
+ - Files < 100KB (simplicity preferred)
358
+ - Simple scripts and one-off conversions
359
+ - Memory is not a concern
360
+ - You want the simplest API
361
+
362
+ **🌊 Use Streaming Processing When:**
363
+
364
+ - Files > 100KB (memory efficiency)
365
+ - Processing many files in batch
366
+ - Memory is constrained
367
+ - You need progress reporting
368
+ - You want to process results incrementally
369
+ - Running in production environments
370
+
371
+ **📋 Specific Recommendations by File Size:**
372
+
373
+ | File Size | Recommendation | Reason |
374
+ | ---------- | ----------------------------------------------- | -------------------------------------- |
375
+ | < 50KB | Regular (simplicity) or Streaming (3-5% faster) | Either works well |
376
+ | 50KB-100KB | Either (streaming slightly preferred) | Minimal difference |
377
+ | 100KB-1MB | Streaming preferred | Better performance + memory efficiency |
378
+ | > 1MB | Streaming strongly recommended | Significant memory advantages |
379
+
380
+ **🔧 Configuration Recommendations:**
381
+
382
+ - **Default chunk_size: 2048 bytes** (optimal performance balance)
383
+ - **For very large files (>10MB)**: Consider `chunk_size=4096`
384
+ - **For memory-constrained environments**: Use smaller chunks `chunk_size=1024`
385
+
386
+ **📈 Performance Benefits:**
387
+
388
+ Streaming provides consistent **3-5% performance improvement** across all file sizes:
389
+
390
+ - **Streaming throughput**: ~0.47-0.48 MB/s
391
+ - **Regular throughput**: ~0.44-0.47 MB/s
392
+ - **Memory usage**: Streaming uses less peak memory for large files
393
+ - **Latency**: Streaming allows processing results before completion
394
+
351
395
  ### Preprocessing API
352
396
 
353
397
  The library provides functions for preprocessing HTML before conversion, useful for cleaning messy or complex HTML:
@@ -5,7 +5,7 @@ requires = [ "setuptools>=78.1" ]
5
5
 
6
6
  [project]
7
7
  name = "html-to-markdown"
8
- version = "1.12.0"
8
+ version = "1.12.1"
9
9
  description = "A modern, type-safe Python library for converting HTML to Markdown with comprehensive tag support and customizable options"
10
10
  readme = "README.md"
11
11
  keywords = [
@@ -58,6 +58,7 @@ dev = [
58
58
  "mypy>=1.18.1",
59
59
  "pre-commit>=4.3",
60
60
  "pytest>=8.4.2",
61
+ "pytest-benchmark>=5.1",
61
62
  "pytest-cov>=7",
62
63
  "pytest-mock>=3.15",
63
64
  "ruff>=0.13",
@@ -114,6 +115,7 @@ lint.per-file-ignores."performance_test.py" = [
114
115
  "UP035",
115
116
  ]
116
117
  lint.per-file-ignores."tests/**/*.*" = [ "ARG", "D", "PD", "PT006", "PT013", "S" ]
118
+ lint.per-file-ignores."tests/benchmark_*_test.py" = [ "T201" ]
117
119
  lint.isort.known-first-party = [ "html_to_markdown", "tests" ]
118
120
  lint.pydocstyle.convention = "google"
119
121
 
@@ -152,6 +154,14 @@ module = "tests.*"
152
154
  disallow_any_generics = false
153
155
  disallow_untyped_decorators = false
154
156
 
157
+ [[tool.mypy.overrides]]
158
+ module = "tests.benchmark_performance_test"
159
+ ignore_missing_imports = true
160
+
161
+ [[tool.mypy.overrides]]
162
+ module = "tests.benchmark_memory_test"
163
+ ignore_missing_imports = true
164
+
155
165
  [tool.uv]
156
166
  default-groups = [ "dev" ]
157
167