natural-pdf 0.1.40__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. natural_pdf/__init__.py +6 -7
  2. natural_pdf/analyzers/__init__.py +6 -1
  3. natural_pdf/analyzers/guides.py +354 -258
  4. natural_pdf/analyzers/layout/layout_analyzer.py +2 -3
  5. natural_pdf/analyzers/layout/layout_manager.py +18 -4
  6. natural_pdf/analyzers/layout/paddle.py +11 -0
  7. natural_pdf/analyzers/layout/surya.py +2 -3
  8. natural_pdf/analyzers/shape_detection_mixin.py +25 -34
  9. natural_pdf/analyzers/text_structure.py +2 -2
  10. natural_pdf/classification/manager.py +1 -1
  11. natural_pdf/collections/mixins.py +3 -2
  12. natural_pdf/core/highlighting_service.py +743 -32
  13. natural_pdf/core/page.py +236 -383
  14. natural_pdf/core/page_collection.py +1249 -0
  15. natural_pdf/core/pdf.py +172 -83
  16. natural_pdf/{collections → core}/pdf_collection.py +18 -11
  17. natural_pdf/core/render_spec.py +335 -0
  18. natural_pdf/describe/base.py +1 -1
  19. natural_pdf/elements/__init__.py +1 -0
  20. natural_pdf/elements/base.py +108 -83
  21. natural_pdf/elements/{collections.py → element_collection.py} +566 -1487
  22. natural_pdf/elements/line.py +0 -1
  23. natural_pdf/elements/rect.py +0 -1
  24. natural_pdf/elements/region.py +318 -243
  25. natural_pdf/elements/text.py +9 -7
  26. natural_pdf/exporters/base.py +2 -2
  27. natural_pdf/exporters/original_pdf.py +1 -1
  28. natural_pdf/exporters/paddleocr.py +2 -4
  29. natural_pdf/exporters/searchable_pdf.py +3 -2
  30. natural_pdf/extraction/mixin.py +1 -3
  31. natural_pdf/flows/collections.py +1 -69
  32. natural_pdf/flows/element.py +4 -4
  33. natural_pdf/flows/flow.py +1200 -243
  34. natural_pdf/flows/region.py +707 -261
  35. natural_pdf/ocr/ocr_options.py +0 -2
  36. natural_pdf/ocr/utils.py +2 -1
  37. natural_pdf/qa/document_qa.py +21 -5
  38. natural_pdf/search/search_service_protocol.py +1 -1
  39. natural_pdf/selectors/parser.py +2 -2
  40. natural_pdf/tables/result.py +35 -1
  41. natural_pdf/text_mixin.py +7 -3
  42. natural_pdf/utils/debug.py +2 -1
  43. natural_pdf/utils/highlighting.py +1 -0
  44. natural_pdf/utils/layout.py +2 -2
  45. natural_pdf/utils/packaging.py +4 -3
  46. natural_pdf/utils/text_extraction.py +15 -12
  47. natural_pdf/utils/visualization.py +385 -0
  48. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.1.dev0.dist-info}/METADATA +7 -3
  49. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.1.dev0.dist-info}/RECORD +55 -53
  50. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.1.dev0.dist-info}/top_level.txt +0 -2
  51. optimization/memory_comparison.py +1 -1
  52. optimization/pdf_analyzer.py +2 -2
  53. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.1.dev0.dist-info}/WHEEL +0 -0
  54. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.1.dev0.dist-info}/entry_points.txt +0 -0
  55. {natural_pdf-0.1.40.dist-info → natural_pdf-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
natural_pdf/__init__.py CHANGED
@@ -70,17 +70,16 @@ options = Options()
70
70
  # Version
71
71
  __version__ = "0.1.1"
72
72
 
73
- # Core imports
74
- from natural_pdf.collections.pdf_collection import PDFCollection
73
+ from natural_pdf.analyzers.guides import Guides
75
74
  from natural_pdf.core.page import Page
75
+ from natural_pdf.core.page_collection import PageCollection
76
76
  from natural_pdf.core.pdf import PDF
77
- from natural_pdf.elements.collections import ElementCollection
77
+
78
+ # Core imports
79
+ from natural_pdf.core.pdf_collection import PDFCollection
78
80
  from natural_pdf.elements.region import Region
79
81
  from natural_pdf.flows.flow import Flow
80
82
  from natural_pdf.flows.region import FlowRegion
81
- from natural_pdf.analyzers.guides import Guides
82
-
83
- ElementCollection = None
84
83
 
85
84
  # Search options (if extras installed)
86
85
  try:
@@ -118,7 +117,6 @@ __all__ = [
118
117
  "PDFCollection",
119
118
  "Page",
120
119
  "Region",
121
- "ElementCollection",
122
120
  "Flow",
123
121
  "FlowRegion",
124
122
  "Guides",
@@ -127,6 +125,7 @@ __all__ = [
127
125
  "BaseSearchOptions",
128
126
  "configure_logging",
129
127
  "options",
128
+ "PageCollection",
130
129
  ]
131
130
 
132
131
  # Add QA components to __all__ if available
@@ -8,23 +8,28 @@ from natural_pdf.analyzers.shape_detection_mixin import ShapeDetectionMixin
8
8
  from natural_pdf.analyzers.text_options import TextStyleOptions
9
9
  from natural_pdf.analyzers.text_structure import TextStyleAnalyzer
10
10
 
11
+
11
12
  # Lazy imports to avoid circular dependencies
12
13
  # These will be imported when actually accessed
13
14
  def __getattr__(name):
14
15
  if name == "LayoutAnalyzer":
15
16
  from natural_pdf.analyzers.layout.layout_analyzer import LayoutAnalyzer
17
+
16
18
  return LayoutAnalyzer
17
19
  elif name == "LayoutManager":
18
20
  from natural_pdf.analyzers.layout.layout_manager import LayoutManager
21
+
19
22
  return LayoutManager
20
23
  elif name == "LayoutOptions":
21
24
  from natural_pdf.analyzers.layout.layout_options import LayoutOptions
25
+
22
26
  return LayoutOptions
23
27
  raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
24
28
 
29
+
25
30
  __all__ = [
26
31
  "LayoutAnalyzer",
27
- "LayoutManager",
32
+ "LayoutManager",
28
33
  "LayoutOptions",
29
34
  "ShapeDetectionMixin",
30
35
  "TextStyleOptions",