natural-pdf 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/api/index.md +386 -0
- docs/assets/favicon.png +3 -0
- docs/assets/favicon.svg +3 -0
- docs/assets/javascripts/custom.js +17 -0
- docs/assets/logo.svg +3 -0
- docs/assets/sample-screen.png +0 -0
- docs/assets/social-preview.png +17 -0
- docs/assets/social-preview.svg +17 -0
- docs/assets/stylesheets/custom.css +65 -0
- docs/document-qa/index.ipynb +435 -0
- docs/document-qa/index.md +79 -0
- docs/element-selection/index.ipynb +915 -0
- docs/element-selection/index.md +229 -0
- docs/index.md +170 -0
- docs/installation/index.md +69 -0
- docs/interactive-widget/index.ipynb +962 -0
- docs/interactive-widget/index.md +12 -0
- docs/layout-analysis/index.ipynb +818 -0
- docs/layout-analysis/index.md +185 -0
- docs/ocr/index.md +222 -0
- docs/pdf-navigation/index.ipynb +314 -0
- docs/pdf-navigation/index.md +97 -0
- docs/regions/index.ipynb +816 -0
- docs/regions/index.md +294 -0
- docs/tables/index.ipynb +658 -0
- docs/tables/index.md +144 -0
- docs/text-analysis/index.ipynb +370 -0
- docs/text-analysis/index.md +105 -0
- docs/text-extraction/index.ipynb +1478 -0
- docs/text-extraction/index.md +292 -0
- docs/tutorials/01-loading-and-extraction.ipynb +1696 -0
- docs/tutorials/01-loading-and-extraction.md +95 -0
- docs/tutorials/02-finding-elements.ipynb +340 -0
- docs/tutorials/02-finding-elements.md +149 -0
- docs/tutorials/03-extracting-blocks.ipynb +147 -0
- docs/tutorials/03-extracting-blocks.md +48 -0
- docs/tutorials/04-table-extraction.ipynb +114 -0
- docs/tutorials/04-table-extraction.md +50 -0
- docs/tutorials/05-excluding-content.ipynb +270 -0
- docs/tutorials/05-excluding-content.md +109 -0
- docs/tutorials/06-document-qa.ipynb +332 -0
- docs/tutorials/06-document-qa.md +91 -0
- docs/tutorials/07-layout-analysis.ipynb +260 -0
- docs/tutorials/07-layout-analysis.md +66 -0
- docs/tutorials/07-working-with-regions.ipynb +409 -0
- docs/tutorials/07-working-with-regions.md +151 -0
- docs/tutorials/08-spatial-navigation.ipynb +508 -0
- docs/tutorials/08-spatial-navigation.md +190 -0
- docs/tutorials/09-section-extraction.ipynb +2434 -0
- docs/tutorials/09-section-extraction.md +256 -0
- docs/tutorials/10-form-field-extraction.ipynb +484 -0
- docs/tutorials/10-form-field-extraction.md +201 -0
- docs/tutorials/11-enhanced-table-processing.ipynb +54 -0
- docs/tutorials/11-enhanced-table-processing.md +9 -0
- docs/tutorials/12-ocr-integration.ipynb +586 -0
- docs/tutorials/12-ocr-integration.md +188 -0
- docs/tutorials/13-semantic-search.ipynb +1888 -0
- docs/tutorials/13-semantic-search.md +77 -0
- docs/visual-debugging/index.ipynb +2970 -0
- docs/visual-debugging/index.md +157 -0
- docs/visual-debugging/region.png +0 -0
- natural_pdf/__init__.py +39 -20
- natural_pdf/analyzers/__init__.py +2 -1
- natural_pdf/analyzers/layout/base.py +32 -24
- natural_pdf/analyzers/layout/docling.py +131 -72
- natural_pdf/analyzers/layout/layout_analyzer.py +156 -113
- natural_pdf/analyzers/layout/layout_manager.py +98 -58
- natural_pdf/analyzers/layout/layout_options.py +32 -17
- natural_pdf/analyzers/layout/paddle.py +152 -95
- natural_pdf/analyzers/layout/surya.py +164 -92
- natural_pdf/analyzers/layout/tatr.py +149 -84
- natural_pdf/analyzers/layout/yolo.py +84 -44
- natural_pdf/analyzers/text_options.py +22 -15
- natural_pdf/analyzers/text_structure.py +131 -85
- natural_pdf/analyzers/utils.py +30 -23
- natural_pdf/collections/pdf_collection.py +126 -98
- natural_pdf/core/__init__.py +1 -1
- natural_pdf/core/element_manager.py +416 -337
- natural_pdf/core/highlighting_service.py +268 -196
- natural_pdf/core/page.py +910 -516
- natural_pdf/core/pdf.py +387 -289
- natural_pdf/elements/__init__.py +1 -1
- natural_pdf/elements/base.py +302 -214
- natural_pdf/elements/collections.py +714 -514
- natural_pdf/elements/line.py +39 -36
- natural_pdf/elements/rect.py +32 -30
- natural_pdf/elements/region.py +854 -883
- natural_pdf/elements/text.py +122 -99
- natural_pdf/exporters/__init__.py +0 -1
- natural_pdf/exporters/searchable_pdf.py +261 -102
- natural_pdf/ocr/__init__.py +23 -14
- natural_pdf/ocr/engine.py +17 -8
- natural_pdf/ocr/engine_easyocr.py +63 -47
- natural_pdf/ocr/engine_paddle.py +97 -68
- natural_pdf/ocr/engine_surya.py +54 -44
- natural_pdf/ocr/ocr_manager.py +88 -62
- natural_pdf/ocr/ocr_options.py +16 -10
- natural_pdf/qa/__init__.py +1 -1
- natural_pdf/qa/document_qa.py +119 -111
- natural_pdf/search/__init__.py +37 -31
- natural_pdf/search/haystack_search_service.py +312 -189
- natural_pdf/search/haystack_utils.py +186 -122
- natural_pdf/search/search_options.py +25 -14
- natural_pdf/search/search_service_protocol.py +12 -6
- natural_pdf/search/searchable_mixin.py +261 -176
- natural_pdf/selectors/__init__.py +2 -1
- natural_pdf/selectors/parser.py +159 -316
- natural_pdf/templates/__init__.py +1 -1
- natural_pdf/utils/highlighting.py +8 -2
- natural_pdf/utils/reading_order.py +65 -63
- natural_pdf/utils/text_extraction.py +195 -0
- natural_pdf/utils/visualization.py +70 -61
- natural_pdf/widgets/__init__.py +2 -3
- natural_pdf/widgets/viewer.py +749 -718
- {natural_pdf-0.1.3.dist-info → natural_pdf-0.1.5.dist-info}/METADATA +29 -15
- natural_pdf-0.1.5.dist-info/RECORD +134 -0
- natural_pdf-0.1.5.dist-info/top_level.txt +5 -0
- notebooks/Examples.ipynb +1293 -0
- pdfs/.gitkeep +0 -0
- pdfs/01-practice.pdf +543 -0
- pdfs/0500000US42001.pdf +0 -0
- pdfs/0500000US42007.pdf +0 -0
- pdfs/2014 Statistics.pdf +0 -0
- pdfs/2019 Statistics.pdf +0 -0
- pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
- pdfs/needs-ocr.pdf +0 -0
- tests/test_loading.py +50 -0
- tests/test_optional_deps.py +298 -0
- natural_pdf-0.1.3.dist-info/RECORD +0 -61
- natural_pdf-0.1.3.dist-info/top_level.txt +0 -1
- {natural_pdf-0.1.3.dist-info → natural_pdf-0.1.5.dist-info}/WHEEL +0 -0
- {natural_pdf-0.1.3.dist-info → natural_pdf-0.1.5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,157 @@
|
|
1
|
+
# Visual Debugging
|
2
|
+
|
3
|
+
Sometimes it's hard to understand what's happening when working with PDFs. Natural PDF provides powerful visual debugging tools to help you see what you're extracting.
|
4
|
+
|
5
|
+
## Adding Persistent Highlights
|
6
|
+
|
7
|
+
Use the `.highlight()` method on `Element` or `ElementCollection` objects to add persistent highlights to a page. These highlights are stored and will appear when viewing the page later.
|
8
|
+
|
9
|
+
```python
|
10
|
+
from natural_pdf import PDF
|
11
|
+
|
12
|
+
pdf = PDF("https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/01-practice.pdf")
|
13
|
+
page = pdf.pages[0]
|
14
|
+
|
15
|
+
# Find a specific element and add a persistent highlight
|
16
|
+
page.find_all('text:contains("Summary")').highlight()
|
17
|
+
page.find_all('text:contains("Date")').highlight()
|
18
|
+
page.find_all('line').highlight()
|
19
|
+
page.to_image(width=700)
|
20
|
+
```
|
21
|
+
|
22
|
+
## Customizing Persistent Highlights
|
23
|
+
|
24
|
+
Customize the appearance of persistent highlights added with `.highlight()`:
|
25
|
+
|
26
|
+
```python
|
27
|
+
page.clear_highlights()
|
28
|
+
|
29
|
+
title = page.find('text:bold[size>=12]')
|
30
|
+
|
31
|
+
# Highlight with a specific color (string name, hex, or RGB/RGBA tuple)
|
32
|
+
# title.highlight(color=(1, 0, 0, 0.3)) # Red with 30% opacity
|
33
|
+
# title.highlight(color="#FF0000") # Hex color
|
34
|
+
title.highlight(color="red") # Color name
|
35
|
+
|
36
|
+
text = page.find('text:contains("Critical")')
|
37
|
+
|
38
|
+
# Add a label to the highlight (appears in legend)
|
39
|
+
text.highlight(label="Critical")
|
40
|
+
|
41
|
+
# Combine color and label
|
42
|
+
rect = page.find('rect')
|
43
|
+
rect.highlight(color=(0, 0, 1, 0.2), label="Box")
|
44
|
+
|
45
|
+
page.to_image(width=700)
|
46
|
+
```
|
47
|
+
|
48
|
+
## Highlighting Multiple Elements
|
49
|
+
|
50
|
+
Highlighting an `ElementCollection` applies the highlight to all elements within it. By default, all elements in the collection get the same color and a label based on their type.
|
51
|
+
|
52
|
+
```python
|
53
|
+
# Find and highlight all headings with a single color/label
|
54
|
+
headings = page.find_all('text[size>=14]:bold')
|
55
|
+
headings.highlight(color=(0, 0.5, 0, 0.3), label="Headings")
|
56
|
+
|
57
|
+
# Find and highlight all tables
|
58
|
+
tables = page.find_all('region[type=table]')
|
59
|
+
tables.highlight(color=(0, 0, 1, 0.2), label="Tables")
|
60
|
+
|
61
|
+
# View the result
|
62
|
+
page.viewer()
|
63
|
+
```
|
64
|
+
|
65
|
+
## Highlighting Regions
|
66
|
+
|
67
|
+
You can highlight regions to see what area you're working with:
|
68
|
+
|
69
|
+
```python
|
70
|
+
# Find a title and create a region below it
|
71
|
+
title = page.find('text:contains("Violations")')
|
72
|
+
content = title.below(height=200)
|
73
|
+
|
74
|
+
# Highlight the region
|
75
|
+
content.show()
|
76
|
+
```
|
77
|
+
|
78
|
+
Or look at just the region by itself
|
79
|
+
|
80
|
+
```python
|
81
|
+
# Find a title and create a region below it
|
82
|
+
title = page.find('text:contains("Violations")')
|
83
|
+
content = title.below(height=200)
|
84
|
+
|
85
|
+
# Crop to the region
|
86
|
+
content.to_image(crop_only=True, include_highlights=False)
|
87
|
+
```
|
88
|
+
|
89
|
+
## Working with Text Styles
|
90
|
+
|
91
|
+
Visualize text styles to understand the document structure:
|
92
|
+
|
93
|
+
```python
|
94
|
+
# Analyze and highlight text styles
|
95
|
+
page.clear_highlights()
|
96
|
+
|
97
|
+
page.analyze_text_styles()
|
98
|
+
page.find_all('text').highlight(group_by='style_label')
|
99
|
+
|
100
|
+
page.to_image(width=700)
|
101
|
+
```
|
102
|
+
|
103
|
+
## Displaying Attributes
|
104
|
+
|
105
|
+
You can display element attributes directly on the highlights:
|
106
|
+
|
107
|
+
```python
|
108
|
+
pdf = PDF("https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/Atlanta_Public_Schools_GA_sample.pdf")
|
109
|
+
page = pdf.pages[0]
|
110
|
+
|
111
|
+
text = page.find_all('line')
|
112
|
+
text.highlight(include_attrs=['width', 'color'])
|
113
|
+
|
114
|
+
page.to_image(width=700)
|
115
|
+
```
|
116
|
+
|
117
|
+
Does it get busy? YES.
|
118
|
+
|
119
|
+
## Clearing Highlights
|
120
|
+
|
121
|
+
You can clear persistent highlights from a page:
|
122
|
+
|
123
|
+
```python
|
124
|
+
# Clear all highlights on the page
|
125
|
+
page.clear_highlights()
|
126
|
+
|
127
|
+
# Apply new highlights
|
128
|
+
page.find_all('text:bold').highlight(label="Bold Text")
|
129
|
+
page.viewer()
|
130
|
+
```
|
131
|
+
|
132
|
+
## Document QA Visualization
|
133
|
+
|
134
|
+
Visualize document QA results:
|
135
|
+
|
136
|
+
```python
|
137
|
+
pdf = PDF("https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/0500000US42007.pdf")
|
138
|
+
page = pdf.pages[0]
|
139
|
+
page.to_image(width=700)
|
140
|
+
```
|
141
|
+
|
142
|
+
```python
|
143
|
+
response = page.ask("How many votes did Kamala Harris get on Election Day?")
|
144
|
+
response
|
145
|
+
```
|
146
|
+
|
147
|
+
```python
|
148
|
+
response['source_elements'].show()
|
149
|
+
```
|
150
|
+
|
151
|
+
## Next Steps
|
152
|
+
|
153
|
+
Now that you know how to visualize PDF content, you might want to explore:
|
154
|
+
|
155
|
+
- [OCR capabilities](../ocr/index.md) for working with scanned documents
|
156
|
+
- [Layout analysis](../layout-analysis/index.ipynb) for automatic structure detection
|
157
|
+
- [Document QA](../document-qa/index.ipynb) for asking questions directly to your documents
|
Binary file
|
natural_pdf/__init__.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
Natural PDF - A more intuitive interface for working with PDFs.
|
3
3
|
"""
|
4
|
+
|
4
5
|
import logging
|
5
6
|
|
6
7
|
# Create library logger
|
@@ -10,10 +11,11 @@ logger = logging.getLogger("natural_pdf")
|
|
10
11
|
# (Best practice for libraries)
|
11
12
|
logger.addHandler(logging.NullHandler())
|
12
13
|
|
14
|
+
|
13
15
|
# Utility function for users to easily configure logging
|
14
16
|
def configure_logging(level=logging.INFO, handler=None):
|
15
17
|
"""Configure Natural PDF's logging.
|
16
|
-
|
18
|
+
|
17
19
|
Args:
|
18
20
|
level: The logging level (e.g., logging.INFO, logging.DEBUG)
|
19
21
|
handler: A custom handler, or None to use StreamHandler
|
@@ -21,28 +23,30 @@ def configure_logging(level=logging.INFO, handler=None):
|
|
21
23
|
# Remove NullHandler if present
|
22
24
|
if logger.handlers and isinstance(logger.handlers[0], logging.NullHandler):
|
23
25
|
logger.removeHandler(logger.handlers[0])
|
24
|
-
|
26
|
+
|
25
27
|
if handler is None:
|
26
28
|
handler = logging.StreamHandler()
|
27
|
-
formatter = logging.Formatter(
|
29
|
+
formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
|
28
30
|
handler.setFormatter(formatter)
|
29
|
-
|
31
|
+
|
30
32
|
logger.addHandler(handler)
|
31
33
|
logger.setLevel(level)
|
32
|
-
|
34
|
+
|
33
35
|
# Propagate level to all child loggers
|
34
36
|
for name in logging.root.manager.loggerDict:
|
35
37
|
if name.startswith("natural_pdf."):
|
36
38
|
logging.getLogger(name).setLevel(level)
|
37
39
|
|
38
|
-
|
40
|
+
|
39
41
|
from natural_pdf.core.page import Page
|
40
|
-
from natural_pdf.
|
42
|
+
from natural_pdf.core.pdf import PDF
|
41
43
|
from natural_pdf.elements.collections import ElementCollection
|
44
|
+
from natural_pdf.elements.region import Region
|
42
45
|
|
43
46
|
# Import QA module if available
|
44
47
|
try:
|
45
48
|
from natural_pdf.qa import DocumentQA, get_qa_engine
|
49
|
+
|
46
50
|
HAS_QA = True
|
47
51
|
except ImportError:
|
48
52
|
HAS_QA = False
|
@@ -50,27 +54,42 @@ except ImportError:
|
|
50
54
|
__version__ = "0.1.1"
|
51
55
|
|
52
56
|
if HAS_QA:
|
53
|
-
__all__ = [
|
57
|
+
__all__ = [
|
58
|
+
"PDF",
|
59
|
+
"Page",
|
60
|
+
"Region",
|
61
|
+
"ElementCollection",
|
62
|
+
"configure_logging",
|
63
|
+
"DocumentQA",
|
64
|
+
"get_qa_engine",
|
65
|
+
]
|
54
66
|
else:
|
55
67
|
__all__ = ["PDF", "Page", "Region", "ElementCollection", "configure_logging"]
|
56
68
|
|
69
|
+
from .collections.pdf_collection import PDFCollection
|
70
|
+
|
57
71
|
# Core classes
|
58
72
|
from .core.pdf import PDF
|
59
|
-
from .collections.pdf_collection import PDFCollection
|
60
73
|
from .elements.region import Region
|
61
74
|
|
62
75
|
# Search options (if extras installed)
|
63
76
|
try:
|
64
|
-
from .search.search_options import
|
77
|
+
from .search.search_options import BaseSearchOptions, MultiModalSearchOptions, TextSearchOptions
|
65
78
|
except ImportError:
|
66
79
|
# Define dummy classes if extras not installed, so imports don't break
|
67
80
|
# but using them will raise the ImportError from check_haystack_availability
|
68
81
|
class TextSearchOptions:
|
69
|
-
def __init__(self, *args, **kwargs):
|
82
|
+
def __init__(self, *args, **kwargs):
|
83
|
+
pass
|
84
|
+
|
70
85
|
class MultiModalSearchOptions:
|
71
|
-
def __init__(self, *args, **kwargs):
|
86
|
+
def __init__(self, *args, **kwargs):
|
87
|
+
pass
|
88
|
+
|
72
89
|
class BaseSearchOptions:
|
73
|
-
def __init__(self, *args, **kwargs):
|
90
|
+
def __init__(self, *args, **kwargs):
|
91
|
+
pass
|
92
|
+
|
74
93
|
|
75
94
|
# Expose logging setup? (Optional)
|
76
95
|
# from . import logging_config
|
@@ -78,10 +97,10 @@ except ImportError:
|
|
78
97
|
|
79
98
|
# Explicitly define what gets imported with 'from natural_pdf import *'
|
80
99
|
__all__ = [
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
]
|
100
|
+
"PDF",
|
101
|
+
"PDFCollection",
|
102
|
+
"Region",
|
103
|
+
"TextSearchOptions", # Include search options
|
104
|
+
"MultiModalSearchOptions",
|
105
|
+
"BaseSearchOptions",
|
106
|
+
]
|
@@ -1,7 +1,8 @@
|
|
1
1
|
# layout_detector_base.py
|
2
2
|
import logging
|
3
3
|
from abc import ABC, abstractmethod
|
4
|
-
from typing import Dict, List,
|
4
|
+
from typing import Any, Dict, List, Optional, Set, Union
|
5
|
+
|
5
6
|
from PIL import Image
|
6
7
|
|
7
8
|
# Assuming layout_options defines BaseLayoutOptions
|
@@ -9,10 +10,13 @@ try:
|
|
9
10
|
from .layout_options import BaseLayoutOptions
|
10
11
|
except ImportError:
|
11
12
|
# Placeholder if run standalone or options not found
|
12
|
-
class BaseLayoutOptions:
|
13
|
+
class BaseLayoutOptions:
|
14
|
+
pass
|
15
|
+
|
13
16
|
|
14
17
|
logger = logging.getLogger(__name__)
|
15
18
|
|
19
|
+
|
16
20
|
class LayoutDetector(ABC):
|
17
21
|
"""
|
18
22
|
Abstract Base Class for layout detection engines.
|
@@ -26,8 +30,8 @@ class LayoutDetector(ABC):
|
|
26
30
|
"""Initializes the base layout detector."""
|
27
31
|
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
28
32
|
self.logger.info(f"Initializing {self.__class__.__name__}")
|
29
|
-
self.supported_classes: Set[str] = set()
|
30
|
-
self._model_cache: Dict[str, Any] = {}
|
33
|
+
self.supported_classes: Set[str] = set() # Subclasses should populate this
|
34
|
+
self._model_cache: Dict[str, Any] = {} # Cache for initialized models
|
31
35
|
|
32
36
|
@abstractmethod
|
33
37
|
def detect(self, image: Image.Image, options: BaseLayoutOptions) -> List[Dict[str, Any]]:
|
@@ -83,20 +87,20 @@ class LayoutDetector(ABC):
|
|
83
87
|
"""
|
84
88
|
cache_key = self._get_cache_key(options)
|
85
89
|
if cache_key not in self._model_cache:
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
90
|
+
self.logger.info(f"Loading model for cache key: {cache_key}")
|
91
|
+
try:
|
92
|
+
# Ensure dependencies are met before loading
|
93
|
+
if not self.is_available():
|
94
|
+
raise RuntimeError(f"{self.__class__.__name__} dependencies are not met.")
|
95
|
+
self._model_cache[cache_key] = self._load_model_from_options(options)
|
96
|
+
self.logger.info(f"Model loaded successfully for key: {cache_key}")
|
97
|
+
except Exception as e:
|
98
|
+
self.logger.error(f"Failed to load model for key {cache_key}: {e}", exc_info=True)
|
99
|
+
# Remove potentially corrupted cache entry
|
100
|
+
self._model_cache.pop(cache_key, None)
|
101
|
+
raise # Re-raise exception after logging
|
98
102
|
else:
|
99
|
-
|
103
|
+
self.logger.debug(f"Using cached model for key: {cache_key}")
|
100
104
|
return self._model_cache[cache_key]
|
101
105
|
|
102
106
|
@abstractmethod
|
@@ -110,8 +114,9 @@ class LayoutDetector(ABC):
|
|
110
114
|
|
111
115
|
def _normalize_class_name(self, name: str) -> str:
|
112
116
|
"""Convert class names with spaces/underscores to hyphenated lowercase format."""
|
113
|
-
if not isinstance(name, str):
|
114
|
-
|
117
|
+
if not isinstance(name, str):
|
118
|
+
name = str(name) # Ensure string
|
119
|
+
return name.lower().replace(" ", "-").replace("_", "-")
|
115
120
|
|
116
121
|
def validate_classes(self, classes: List[str]) -> None:
|
117
122
|
"""
|
@@ -124,8 +129,10 @@ class LayoutDetector(ABC):
|
|
124
129
|
ValueError: If any class is not supported.
|
125
130
|
"""
|
126
131
|
if not self.supported_classes:
|
127
|
-
|
128
|
-
|
132
|
+
self.logger.warning(
|
133
|
+
"Supported classes not defined for this detector. Skipping class validation."
|
134
|
+
)
|
135
|
+
return
|
129
136
|
|
130
137
|
if classes:
|
131
138
|
# Normalize both requested and supported classes for comparison
|
@@ -138,8 +145,10 @@ class LayoutDetector(ABC):
|
|
138
145
|
unsupported_original = [
|
139
146
|
c for c in classes if self._normalize_class_name(c) in unsupported_normalized
|
140
147
|
]
|
141
|
-
raise ValueError(
|
142
|
-
|
148
|
+
raise ValueError(
|
149
|
+
f"Classes not supported by {self.__class__.__name__}: {unsupported_original}. "
|
150
|
+
f"Supported (normalized): {sorted(list(normalized_supported))}"
|
151
|
+
)
|
143
152
|
|
144
153
|
def __del__(self):
|
145
154
|
"""Cleanup resources."""
|
@@ -148,4 +157,3 @@ class LayoutDetector(ABC):
|
|
148
157
|
# Consider implications if models are shared or expensive to reload
|
149
158
|
# del self._model_cache # Optional: uncomment if models should be released aggressively
|
150
159
|
self._model_cache.clear()
|
151
|
-
|