natural-pdf 0.1.40__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/__init__.py +6 -7
- natural_pdf/analyzers/__init__.py +6 -1
- natural_pdf/analyzers/guides.py +354 -258
- natural_pdf/analyzers/layout/layout_analyzer.py +2 -3
- natural_pdf/analyzers/layout/layout_manager.py +18 -4
- natural_pdf/analyzers/layout/paddle.py +11 -0
- natural_pdf/analyzers/layout/surya.py +2 -3
- natural_pdf/analyzers/shape_detection_mixin.py +25 -34
- natural_pdf/analyzers/text_structure.py +2 -2
- natural_pdf/classification/manager.py +1 -1
- natural_pdf/collections/mixins.py +3 -2
- natural_pdf/core/highlighting_service.py +743 -32
- natural_pdf/core/page.py +236 -383
- natural_pdf/core/page_collection.py +1249 -0
- natural_pdf/core/pdf.py +172 -83
- natural_pdf/{collections → core}/pdf_collection.py +18 -11
- natural_pdf/core/render_spec.py +335 -0
- natural_pdf/describe/base.py +1 -1
- natural_pdf/elements/__init__.py +1 -0
- natural_pdf/elements/base.py +108 -83
- natural_pdf/elements/{collections.py → element_collection.py} +566 -1487
- natural_pdf/elements/line.py +0 -1
- natural_pdf/elements/rect.py +0 -1
- natural_pdf/elements/region.py +318 -243
- natural_pdf/elements/text.py +9 -7
- natural_pdf/exporters/base.py +2 -2
- natural_pdf/exporters/original_pdf.py +1 -1
- natural_pdf/exporters/paddleocr.py +2 -4
- natural_pdf/exporters/searchable_pdf.py +3 -2
- natural_pdf/extraction/mixin.py +1 -3
- natural_pdf/flows/collections.py +1 -69
- natural_pdf/flows/element.py +4 -4
- natural_pdf/flows/flow.py +1200 -243
- natural_pdf/flows/region.py +707 -261
- natural_pdf/ocr/ocr_options.py +0 -2
- natural_pdf/ocr/utils.py +2 -1
- natural_pdf/qa/document_qa.py +21 -5
- natural_pdf/search/search_service_protocol.py +1 -1
- natural_pdf/selectors/parser.py +2 -2
- natural_pdf/tables/result.py +35 -1
- natural_pdf/text_mixin.py +7 -3
- natural_pdf/utils/debug.py +2 -1
- natural_pdf/utils/highlighting.py +1 -0
- natural_pdf/utils/layout.py +2 -2
- natural_pdf/utils/packaging.py +4 -3
- natural_pdf/utils/text_extraction.py +15 -12
- natural_pdf/utils/visualization.py +385 -0
- {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.1.dev0.dist-info}/METADATA +7 -3
- {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.1.dev0.dist-info}/RECORD +55 -53
- {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.1.dev0.dist-info}/top_level.txt +0 -2
- optimization/memory_comparison.py +1 -1
- optimization/pdf_analyzer.py +2 -2
- {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.1.dev0.dist-info}/WHEEL +0 -0
- {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.1.dev0.dist-info}/entry_points.txt +0 -0
- {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
natural_pdf/__init__.py
CHANGED
@@ -70,17 +70,16 @@ options = Options()
|
|
70
70
|
# Version
|
71
71
|
__version__ = "0.1.1"
|
72
72
|
|
73
|
-
|
74
|
-
from natural_pdf.collections.pdf_collection import PDFCollection
|
73
|
+
from natural_pdf.analyzers.guides import Guides
|
75
74
|
from natural_pdf.core.page import Page
|
75
|
+
from natural_pdf.core.page_collection import PageCollection
|
76
76
|
from natural_pdf.core.pdf import PDF
|
77
|
-
|
77
|
+
|
78
|
+
# Core imports
|
79
|
+
from natural_pdf.core.pdf_collection import PDFCollection
|
78
80
|
from natural_pdf.elements.region import Region
|
79
81
|
from natural_pdf.flows.flow import Flow
|
80
82
|
from natural_pdf.flows.region import FlowRegion
|
81
|
-
from natural_pdf.analyzers.guides import Guides
|
82
|
-
|
83
|
-
ElementCollection = None
|
84
83
|
|
85
84
|
# Search options (if extras installed)
|
86
85
|
try:
|
@@ -118,7 +117,6 @@ __all__ = [
|
|
118
117
|
"PDFCollection",
|
119
118
|
"Page",
|
120
119
|
"Region",
|
121
|
-
"ElementCollection",
|
122
120
|
"Flow",
|
123
121
|
"FlowRegion",
|
124
122
|
"Guides",
|
@@ -127,6 +125,7 @@ __all__ = [
|
|
127
125
|
"BaseSearchOptions",
|
128
126
|
"configure_logging",
|
129
127
|
"options",
|
128
|
+
"PageCollection",
|
130
129
|
]
|
131
130
|
|
132
131
|
# Add QA components to __all__ if available
|
@@ -8,23 +8,28 @@ from natural_pdf.analyzers.shape_detection_mixin import ShapeDetectionMixin
|
|
8
8
|
from natural_pdf.analyzers.text_options import TextStyleOptions
|
9
9
|
from natural_pdf.analyzers.text_structure import TextStyleAnalyzer
|
10
10
|
|
11
|
+
|
11
12
|
# Lazy imports to avoid circular dependencies
|
12
13
|
# These will be imported when actually accessed
|
13
14
|
def __getattr__(name):
|
14
15
|
if name == "LayoutAnalyzer":
|
15
16
|
from natural_pdf.analyzers.layout.layout_analyzer import LayoutAnalyzer
|
17
|
+
|
16
18
|
return LayoutAnalyzer
|
17
19
|
elif name == "LayoutManager":
|
18
20
|
from natural_pdf.analyzers.layout.layout_manager import LayoutManager
|
21
|
+
|
19
22
|
return LayoutManager
|
20
23
|
elif name == "LayoutOptions":
|
21
24
|
from natural_pdf.analyzers.layout.layout_options import LayoutOptions
|
25
|
+
|
22
26
|
return LayoutOptions
|
23
27
|
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
24
28
|
|
29
|
+
|
25
30
|
__all__ = [
|
26
31
|
"LayoutAnalyzer",
|
27
|
-
"LayoutManager",
|
32
|
+
"LayoutManager",
|
28
33
|
"LayoutOptions",
|
29
34
|
"ShapeDetectionMixin",
|
30
35
|
"TextStyleOptions",
|