natural-pdf 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. docs/api/index.md +386 -0
  2. docs/assets/favicon.png +3 -0
  3. docs/assets/favicon.svg +3 -0
  4. docs/assets/javascripts/custom.js +17 -0
  5. docs/assets/logo.svg +3 -0
  6. docs/assets/sample-screen.png +0 -0
  7. docs/assets/social-preview.png +17 -0
  8. docs/assets/social-preview.svg +17 -0
  9. docs/assets/stylesheets/custom.css +65 -0
  10. docs/document-qa/index.ipynb +435 -0
  11. docs/document-qa/index.md +79 -0
  12. docs/element-selection/index.ipynb +915 -0
  13. docs/element-selection/index.md +229 -0
  14. docs/index.md +170 -0
  15. docs/installation/index.md +69 -0
  16. docs/interactive-widget/index.ipynb +962 -0
  17. docs/interactive-widget/index.md +12 -0
  18. docs/layout-analysis/index.ipynb +818 -0
  19. docs/layout-analysis/index.md +185 -0
  20. docs/ocr/index.md +209 -0
  21. docs/pdf-navigation/index.ipynb +314 -0
  22. docs/pdf-navigation/index.md +97 -0
  23. docs/regions/index.ipynb +816 -0
  24. docs/regions/index.md +294 -0
  25. docs/tables/index.ipynb +658 -0
  26. docs/tables/index.md +144 -0
  27. docs/text-analysis/index.ipynb +370 -0
  28. docs/text-analysis/index.md +105 -0
  29. docs/text-extraction/index.ipynb +1478 -0
  30. docs/text-extraction/index.md +292 -0
  31. docs/tutorials/01-loading-and-extraction.ipynb +1710 -0
  32. docs/tutorials/01-loading-and-extraction.md +95 -0
  33. docs/tutorials/02-finding-elements.ipynb +340 -0
  34. docs/tutorials/02-finding-elements.md +149 -0
  35. docs/tutorials/03-extracting-blocks.ipynb +147 -0
  36. docs/tutorials/03-extracting-blocks.md +48 -0
  37. docs/tutorials/04-table-extraction.ipynb +114 -0
  38. docs/tutorials/04-table-extraction.md +50 -0
  39. docs/tutorials/05-excluding-content.ipynb +270 -0
  40. docs/tutorials/05-excluding-content.md +109 -0
  41. docs/tutorials/06-document-qa.ipynb +332 -0
  42. docs/tutorials/06-document-qa.md +91 -0
  43. docs/tutorials/07-layout-analysis.ipynb +288 -0
  44. docs/tutorials/07-layout-analysis.md +66 -0
  45. docs/tutorials/07-working-with-regions.ipynb +413 -0
  46. docs/tutorials/07-working-with-regions.md +151 -0
  47. docs/tutorials/08-spatial-navigation.ipynb +508 -0
  48. docs/tutorials/08-spatial-navigation.md +190 -0
  49. docs/tutorials/09-section-extraction.ipynb +2434 -0
  50. docs/tutorials/09-section-extraction.md +256 -0
  51. docs/tutorials/10-form-field-extraction.ipynb +512 -0
  52. docs/tutorials/10-form-field-extraction.md +201 -0
  53. docs/tutorials/11-enhanced-table-processing.ipynb +54 -0
  54. docs/tutorials/11-enhanced-table-processing.md +9 -0
  55. docs/tutorials/12-ocr-integration.ipynb +604 -0
  56. docs/tutorials/12-ocr-integration.md +175 -0
  57. docs/tutorials/13-semantic-search.ipynb +1328 -0
  58. docs/tutorials/13-semantic-search.md +77 -0
  59. docs/visual-debugging/index.ipynb +2970 -0
  60. docs/visual-debugging/index.md +157 -0
  61. docs/visual-debugging/region.png +0 -0
  62. natural_pdf/__init__.py +50 -33
  63. natural_pdf/analyzers/__init__.py +2 -1
  64. natural_pdf/analyzers/layout/base.py +32 -24
  65. natural_pdf/analyzers/layout/docling.py +131 -72
  66. natural_pdf/analyzers/layout/gemini.py +264 -0
  67. natural_pdf/analyzers/layout/layout_analyzer.py +156 -113
  68. natural_pdf/analyzers/layout/layout_manager.py +125 -58
  69. natural_pdf/analyzers/layout/layout_options.py +43 -17
  70. natural_pdf/analyzers/layout/paddle.py +152 -95
  71. natural_pdf/analyzers/layout/surya.py +164 -92
  72. natural_pdf/analyzers/layout/tatr.py +149 -84
  73. natural_pdf/analyzers/layout/yolo.py +89 -45
  74. natural_pdf/analyzers/text_options.py +22 -15
  75. natural_pdf/analyzers/text_structure.py +131 -85
  76. natural_pdf/analyzers/utils.py +30 -23
  77. natural_pdf/collections/pdf_collection.py +146 -97
  78. natural_pdf/core/__init__.py +1 -1
  79. natural_pdf/core/element_manager.py +419 -337
  80. natural_pdf/core/highlighting_service.py +268 -196
  81. natural_pdf/core/page.py +1044 -521
  82. natural_pdf/core/pdf.py +516 -313
  83. natural_pdf/elements/__init__.py +1 -1
  84. natural_pdf/elements/base.py +307 -225
  85. natural_pdf/elements/collections.py +805 -543
  86. natural_pdf/elements/line.py +39 -36
  87. natural_pdf/elements/rect.py +32 -30
  88. natural_pdf/elements/region.py +889 -879
  89. natural_pdf/elements/text.py +127 -99
  90. natural_pdf/exporters/__init__.py +0 -1
  91. natural_pdf/exporters/searchable_pdf.py +261 -102
  92. natural_pdf/ocr/__init__.py +57 -35
  93. natural_pdf/ocr/engine.py +150 -46
  94. natural_pdf/ocr/engine_easyocr.py +146 -150
  95. natural_pdf/ocr/engine_paddle.py +118 -175
  96. natural_pdf/ocr/engine_surya.py +78 -141
  97. natural_pdf/ocr/ocr_factory.py +114 -0
  98. natural_pdf/ocr/ocr_manager.py +122 -124
  99. natural_pdf/ocr/ocr_options.py +16 -20
  100. natural_pdf/ocr/utils.py +98 -0
  101. natural_pdf/qa/__init__.py +1 -1
  102. natural_pdf/qa/document_qa.py +119 -111
  103. natural_pdf/search/__init__.py +37 -31
  104. natural_pdf/search/haystack_search_service.py +312 -189
  105. natural_pdf/search/haystack_utils.py +186 -122
  106. natural_pdf/search/search_options.py +25 -14
  107. natural_pdf/search/search_service_protocol.py +12 -6
  108. natural_pdf/search/searchable_mixin.py +261 -176
  109. natural_pdf/selectors/__init__.py +2 -1
  110. natural_pdf/selectors/parser.py +159 -316
  111. natural_pdf/templates/__init__.py +1 -1
  112. natural_pdf/templates/spa/css/style.css +334 -0
  113. natural_pdf/templates/spa/index.html +31 -0
  114. natural_pdf/templates/spa/js/app.js +472 -0
  115. natural_pdf/templates/spa/words.txt +235976 -0
  116. natural_pdf/utils/debug.py +32 -0
  117. natural_pdf/utils/highlighting.py +8 -2
  118. natural_pdf/utils/identifiers.py +29 -0
  119. natural_pdf/utils/packaging.py +418 -0
  120. natural_pdf/utils/reading_order.py +65 -63
  121. natural_pdf/utils/text_extraction.py +195 -0
  122. natural_pdf/utils/visualization.py +70 -61
  123. natural_pdf/widgets/__init__.py +2 -3
  124. natural_pdf/widgets/viewer.py +749 -718
  125. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/METADATA +53 -17
  126. natural_pdf-0.1.6.dist-info/RECORD +141 -0
  127. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/WHEEL +1 -1
  128. natural_pdf-0.1.6.dist-info/top_level.txt +4 -0
  129. notebooks/Examples.ipynb +1293 -0
  130. pdfs/.gitkeep +0 -0
  131. pdfs/01-practice.pdf +543 -0
  132. pdfs/0500000US42001.pdf +0 -0
  133. pdfs/0500000US42007.pdf +0 -0
  134. pdfs/2014 Statistics.pdf +0 -0
  135. pdfs/2019 Statistics.pdf +0 -0
  136. pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  137. pdfs/needs-ocr.pdf +0 -0
  138. natural_pdf/templates/ocr_debug.html +0 -517
  139. natural_pdf-0.1.4.dist-info/RECORD +0 -61
  140. natural_pdf-0.1.4.dist-info/top_level.txt +0 -1
  141. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,157 @@
1
+ # Visual Debugging
2
+
3
+ Sometimes it's hard to understand what's happening when working with PDFs. Natural PDF provides powerful visual debugging tools to help you see what you're extracting.
4
+
5
+ ## Adding Persistent Highlights
6
+
7
+ Use the `.highlight()` method on `Element` or `ElementCollection` objects to add persistent highlights to a page. These highlights are stored and will appear when viewing the page later.
8
+
9
+ ```python
10
+ from natural_pdf import PDF
11
+
12
+ pdf = PDF("https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/01-practice.pdf")
13
+ page = pdf.pages[0]
14
+
15
+ # Find a specific element and add a persistent highlight
16
+ page.find_all('text:contains("Summary")').highlight()
17
+ page.find_all('text:contains("Date")').highlight()
18
+ page.find_all('line').highlight()
19
+ page.to_image(width=700)
20
+ ```
21
+
22
+ ## Customizing Persistent Highlights
23
+
24
+ Customize the appearance of persistent highlights added with `.highlight()`:
25
+
26
+ ```python
27
+ page.clear_highlights()
28
+
29
+ title = page.find('text:bold[size>=12]')
30
+
31
+ # Highlight with a specific color (string name, hex, or RGB/RGBA tuple)
32
+ # title.highlight(color=(1, 0, 0, 0.3)) # Red with 30% opacity
33
+ # title.highlight(color="#FF0000") # Hex color
34
+ title.highlight(color="red") # Color name
35
+
36
+ text = page.find('text:contains("Critical")')
37
+
38
+ # Add a label to the highlight (appears in legend)
39
+ text.highlight(label="Critical")
40
+
41
+ # Combine color and label
42
+ rect = page.find('rect')
43
+ rect.highlight(color=(0, 0, 1, 0.2), label="Box")
44
+
45
+ page.to_image(width=700)
46
+ ```
47
+
48
+ ## Highlighting Multiple Elements
49
+
50
+ Highlighting an `ElementCollection` applies the highlight to all elements within it. By default, all elements in the collection get the same color and a label based on their type.
51
+
52
+ ```python
53
+ # Find and highlight all headings with a single color/label
54
+ headings = page.find_all('text[size>=14]:bold')
55
+ headings.highlight(color=(0, 0.5, 0, 0.3), label="Headings")
56
+
57
+ # Find and highlight all tables
58
+ tables = page.find_all('region[type=table]')
59
+ tables.highlight(color=(0, 0, 1, 0.2), label="Tables")
60
+
61
+ # View the result
62
+ page.viewer()
63
+ ```
64
+
65
+ ## Highlighting Regions
66
+
67
+ You can highlight regions to see what area you're working with:
68
+
69
+ ```python
70
+ # Find a title and create a region below it
71
+ title = page.find('text:contains("Violations")')
72
+ content = title.below(height=200)
73
+
74
+ # Highlight the region
75
+ content.show()
76
+ ```
77
+
78
+ Or look at just the region by itself
79
+
80
+ ```python
81
+ # Find a title and create a region below it
82
+ title = page.find('text:contains("Violations")')
83
+ content = title.below(height=200)
84
+
85
+ # Crop to the region
86
+ content.to_image(crop_only=True, include_highlights=False)
87
+ ```
88
+
89
+ ## Working with Text Styles
90
+
91
+ Visualize text styles to understand the document structure:
92
+
93
+ ```python
94
+ # Analyze and highlight text styles
95
+ page.clear_highlights()
96
+
97
+ page.analyze_text_styles()
98
+ page.find_all('text').highlight(group_by='style_label')
99
+
100
+ page.to_image(width=700)
101
+ ```
102
+
103
+ ## Displaying Attributes
104
+
105
+ You can display element attributes directly on the highlights:
106
+
107
+ ```python
108
+ pdf = PDF("https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/Atlanta_Public_Schools_GA_sample.pdf")
109
+ page = pdf.pages[0]
110
+
111
+ text = page.find_all('line')
112
+ text.highlight(include_attrs=['width', 'color'])
113
+
114
+ page.to_image(width=700)
115
+ ```
116
+
117
+ Does it get busy? YES.
118
+
119
+ ## Clearing Highlights
120
+
121
+ You can clear persistent highlights from a page:
122
+
123
+ ```python
124
+ # Clear all highlights on the page
125
+ page.clear_highlights()
126
+
127
+ # Apply new highlights
128
+ page.find_all('text:bold').highlight(label="Bold Text")
129
+ page.viewer()
130
+ ```
131
+
132
+ ## Document QA Visualization
133
+
134
+ Visualize document QA results:
135
+
136
+ ```python
137
+ pdf = PDF("https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/0500000US42007.pdf")
138
+ page = pdf.pages[0]
139
+ page.to_image(width=700)
140
+ ```
141
+
142
+ ```python
143
+ response = page.ask("How many votes did Kamala Harris get on Election Day?")
144
+ response
145
+ ```
146
+
147
+ ```python
148
+ response['source_elements'].show()
149
+ ```
150
+
151
+ ## Next Steps
152
+
153
+ Now that you know how to visualize PDF content, you might want to explore:
154
+
155
+ - [OCR capabilities](../ocr/index.md) for working with scanned documents
156
+ - [Layout analysis](../layout-analysis/index.ipynb) for automatic structure detection
157
+ - [Document QA](../document-qa/index.ipynb) for asking questions directly to your documents
Binary file
natural_pdf/__init__.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Natural PDF - A more intuitive interface for working with PDFs.
3
3
  """
4
+
4
5
  import logging
5
6
 
6
7
  # Create library logger
@@ -10,67 +11,83 @@ logger = logging.getLogger("natural_pdf")
10
11
  # (Best practice for libraries)
11
12
  logger.addHandler(logging.NullHandler())
12
13
 
13
- # Utility function for users to easily configure logging
14
+
14
15
  def configure_logging(level=logging.INFO, handler=None):
15
- """Configure Natural PDF's logging.
16
-
16
+ """Configure logging for the natural_pdf package.
17
+
17
18
  Args:
18
- level: The logging level (e.g., logging.INFO, logging.DEBUG)
19
- handler: A custom handler, or None to use StreamHandler
19
+ level: Logging level (e.g., logging.INFO, logging.DEBUG)
20
+ handler: Optional custom handler. Defaults to a StreamHandler.
20
21
  """
21
- # Remove NullHandler if present
22
- if logger.handlers and isinstance(logger.handlers[0], logging.NullHandler):
23
- logger.removeHandler(logger.handlers[0])
24
-
22
+ # Avoid adding duplicate handlers
23
+ if any(isinstance(h, logging.StreamHandler) for h in logger.handlers):
24
+ return
25
+
25
26
  if handler is None:
26
27
  handler = logging.StreamHandler()
27
- formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
28
+ formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
28
29
  handler.setFormatter(formatter)
29
-
30
+
30
31
  logger.addHandler(handler)
31
32
  logger.setLevel(level)
32
-
33
- # Propagate level to all child loggers
34
- for name in logging.root.manager.loggerDict:
35
- if name.startswith("natural_pdf."):
36
- logging.getLogger(name).setLevel(level)
37
33
 
38
- from natural_pdf.core.pdf import PDF
34
+ logger.propagate = False
35
+
39
36
  from natural_pdf.core.page import Page
40
- from natural_pdf.elements.region import Region
37
+ from natural_pdf.core.pdf import PDF
41
38
  from natural_pdf.elements.collections import ElementCollection
39
+ from natural_pdf.elements.region import Region
42
40
 
43
41
  # Import QA module if available
44
42
  try:
45
43
  from natural_pdf.qa import DocumentQA, get_qa_engine
44
+
46
45
  HAS_QA = True
47
46
  except ImportError:
48
47
  HAS_QA = False
49
48
 
50
49
  __version__ = "0.1.1"
51
50
 
51
+ __all__ = [
52
+ "PDF",
53
+ "PDFCollection",
54
+ "Page",
55
+ "Region",
56
+ "ElementCollection",
57
+ "TextSearchOptions",
58
+ "MultiModalSearchOptions",
59
+ "BaseSearchOptions",
60
+ "configure_logging",
61
+ ]
62
+
52
63
  if HAS_QA:
53
- __all__ = ["PDF", "Page", "Region", "ElementCollection", "configure_logging", "DocumentQA", "get_qa_engine"]
54
- else:
55
- __all__ = ["PDF", "Page", "Region", "ElementCollection", "configure_logging"]
64
+ __all__.extend(["DocumentQA", "get_qa_engine"])
65
+
66
+
67
+ from .collections.pdf_collection import PDFCollection
56
68
 
57
69
  # Core classes
58
70
  from .core.pdf import PDF
59
- from .collections.pdf_collection import PDFCollection
60
71
  from .elements.region import Region
61
72
 
62
73
  # Search options (if extras installed)
63
74
  try:
64
- from .search.search_options import TextSearchOptions, MultiModalSearchOptions, BaseSearchOptions
75
+ from .search.search_options import BaseSearchOptions, MultiModalSearchOptions, TextSearchOptions
65
76
  except ImportError:
66
77
  # Define dummy classes if extras not installed, so imports don't break
67
78
  # but using them will raise the ImportError from check_haystack_availability
68
79
  class TextSearchOptions:
69
- def __init__(self, *args, **kwargs): pass
80
+ def __init__(self, *args, **kwargs):
81
+ pass
82
+
70
83
  class MultiModalSearchOptions:
71
- def __init__(self, *args, **kwargs): pass
84
+ def __init__(self, *args, **kwargs):
85
+ pass
86
+
72
87
  class BaseSearchOptions:
73
- def __init__(self, *args, **kwargs): pass
88
+ def __init__(self, *args, **kwargs):
89
+ pass
90
+
74
91
 
75
92
  # Expose logging setup? (Optional)
76
93
  # from . import logging_config
@@ -78,10 +95,10 @@ except ImportError:
78
95
 
79
96
  # Explicitly define what gets imported with 'from natural_pdf import *'
80
97
  __all__ = [
81
- 'PDF',
82
- 'PDFCollection',
83
- 'Region',
84
- 'TextSearchOptions', # Include search options
85
- 'MultiModalSearchOptions',
86
- 'BaseSearchOptions'
87
- ]
98
+ "PDF",
99
+ "PDFCollection",
100
+ "Region",
101
+ "TextSearchOptions", # Include search options
102
+ "MultiModalSearchOptions",
103
+ "BaseSearchOptions",
104
+ ]
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Analyzers for natural-pdf.
3
3
  """
4
+
4
5
  from .layout import *
5
6
  from .text_structure import TextStyleAnalyzer
6
- from .utils import convert_to_regions
7
+ from .utils import convert_to_regions
@@ -1,7 +1,8 @@
1
1
  # layout_detector_base.py
2
2
  import logging
3
3
  from abc import ABC, abstractmethod
4
- from typing import Dict, List, Any, Optional, Set, Union
4
+ from typing import Any, Dict, List, Optional, Set, Union
5
+
5
6
  from PIL import Image
6
7
 
7
8
  # Assuming layout_options defines BaseLayoutOptions
@@ -9,10 +10,13 @@ try:
9
10
  from .layout_options import BaseLayoutOptions
10
11
  except ImportError:
11
12
  # Placeholder if run standalone or options not found
12
- class BaseLayoutOptions: pass
13
+ class BaseLayoutOptions:
14
+ pass
15
+
13
16
 
14
17
  logger = logging.getLogger(__name__)
15
18
 
19
+
16
20
  class LayoutDetector(ABC):
17
21
  """
18
22
  Abstract Base Class for layout detection engines.
@@ -26,8 +30,8 @@ class LayoutDetector(ABC):
26
30
  """Initializes the base layout detector."""
27
31
  self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
28
32
  self.logger.info(f"Initializing {self.__class__.__name__}")
29
- self.supported_classes: Set[str] = set() # Subclasses should populate this
30
- self._model_cache: Dict[str, Any] = {} # Cache for initialized models
33
+ self.supported_classes: Set[str] = set() # Subclasses should populate this
34
+ self._model_cache: Dict[str, Any] = {} # Cache for initialized models
31
35
 
32
36
  @abstractmethod
33
37
  def detect(self, image: Image.Image, options: BaseLayoutOptions) -> List[Dict[str, Any]]:
@@ -83,20 +87,20 @@ class LayoutDetector(ABC):
83
87
  """
84
88
  cache_key = self._get_cache_key(options)
85
89
  if cache_key not in self._model_cache:
86
- self.logger.info(f"Loading model for cache key: {cache_key}")
87
- try:
88
- # Ensure dependencies are met before loading
89
- if not self.is_available():
90
- raise RuntimeError(f"{self.__class__.__name__} dependencies are not met.")
91
- self._model_cache[cache_key] = self._load_model_from_options(options)
92
- self.logger.info(f"Model loaded successfully for key: {cache_key}")
93
- except Exception as e:
94
- self.logger.error(f"Failed to load model for key {cache_key}: {e}", exc_info=True)
95
- # Remove potentially corrupted cache entry
96
- self._model_cache.pop(cache_key, None)
97
- raise # Re-raise exception after logging
90
+ self.logger.info(f"Loading model for cache key: {cache_key}")
91
+ try:
92
+ # Ensure dependencies are met before loading
93
+ if not self.is_available():
94
+ raise RuntimeError(f"{self.__class__.__name__} dependencies are not met.")
95
+ self._model_cache[cache_key] = self._load_model_from_options(options)
96
+ self.logger.info(f"Model loaded successfully for key: {cache_key}")
97
+ except Exception as e:
98
+ self.logger.error(f"Failed to load model for key {cache_key}: {e}", exc_info=True)
99
+ # Remove potentially corrupted cache entry
100
+ self._model_cache.pop(cache_key, None)
101
+ raise # Re-raise exception after logging
98
102
  else:
99
- self.logger.debug(f"Using cached model for key: {cache_key}")
103
+ self.logger.debug(f"Using cached model for key: {cache_key}")
100
104
  return self._model_cache[cache_key]
101
105
 
102
106
  @abstractmethod
@@ -110,8 +114,9 @@ class LayoutDetector(ABC):
110
114
 
111
115
  def _normalize_class_name(self, name: str) -> str:
112
116
  """Convert class names with spaces/underscores to hyphenated lowercase format."""
113
- if not isinstance(name, str): name = str(name) # Ensure string
114
- return name.lower().replace(' ', '-').replace('_', '-')
117
+ if not isinstance(name, str):
118
+ name = str(name) # Ensure string
119
+ return name.lower().replace(" ", "-").replace("_", "-")
115
120
 
116
121
  def validate_classes(self, classes: List[str]) -> None:
117
122
  """
@@ -124,8 +129,10 @@ class LayoutDetector(ABC):
124
129
  ValueError: If any class is not supported.
125
130
  """
126
131
  if not self.supported_classes:
127
- self.logger.warning("Supported classes not defined for this detector. Skipping class validation.")
128
- return
132
+ self.logger.warning(
133
+ "Supported classes not defined for this detector. Skipping class validation."
134
+ )
135
+ return
129
136
 
130
137
  if classes:
131
138
  # Normalize both requested and supported classes for comparison
@@ -138,8 +145,10 @@ class LayoutDetector(ABC):
138
145
  unsupported_original = [
139
146
  c for c in classes if self._normalize_class_name(c) in unsupported_normalized
140
147
  ]
141
- raise ValueError(f"Classes not supported by {self.__class__.__name__}: {unsupported_original}. "
142
- f"Supported (normalized): {sorted(list(normalized_supported))}")
148
+ raise ValueError(
149
+ f"Classes not supported by {self.__class__.__name__}: {unsupported_original}. "
150
+ f"Supported (normalized): {sorted(list(normalized_supported))}"
151
+ )
143
152
 
144
153
  def __del__(self):
145
154
  """Cleanup resources."""
@@ -148,4 +157,3 @@ class LayoutDetector(ABC):
148
157
  # Consider implications if models are shared or expensive to reload
149
158
  # del self._model_cache # Optional: uncomment if models should be released aggressively
150
159
  self._model_cache.clear()
151
-