natural-pdf 0.1.38__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. natural_pdf/__init__.py +11 -6
  2. natural_pdf/analyzers/__init__.py +6 -1
  3. natural_pdf/analyzers/guides.py +354 -258
  4. natural_pdf/analyzers/layout/layout_analyzer.py +2 -3
  5. natural_pdf/analyzers/layout/layout_manager.py +18 -4
  6. natural_pdf/analyzers/layout/paddle.py +11 -0
  7. natural_pdf/analyzers/layout/surya.py +2 -3
  8. natural_pdf/analyzers/shape_detection_mixin.py +25 -34
  9. natural_pdf/analyzers/text_structure.py +2 -2
  10. natural_pdf/classification/manager.py +1 -1
  11. natural_pdf/collections/mixins.py +3 -2
  12. natural_pdf/core/highlighting_service.py +743 -32
  13. natural_pdf/core/page.py +252 -399
  14. natural_pdf/core/page_collection.py +1249 -0
  15. natural_pdf/core/pdf.py +231 -89
  16. natural_pdf/{collections → core}/pdf_collection.py +18 -11
  17. natural_pdf/core/render_spec.py +335 -0
  18. natural_pdf/describe/base.py +1 -1
  19. natural_pdf/elements/__init__.py +1 -0
  20. natural_pdf/elements/base.py +108 -83
  21. natural_pdf/elements/{collections.py → element_collection.py} +575 -1372
  22. natural_pdf/elements/line.py +0 -1
  23. natural_pdf/elements/rect.py +0 -1
  24. natural_pdf/elements/region.py +405 -280
  25. natural_pdf/elements/text.py +9 -7
  26. natural_pdf/exporters/base.py +2 -2
  27. natural_pdf/exporters/original_pdf.py +1 -1
  28. natural_pdf/exporters/paddleocr.py +2 -4
  29. natural_pdf/exporters/searchable_pdf.py +3 -2
  30. natural_pdf/extraction/mixin.py +1 -3
  31. natural_pdf/flows/collections.py +1 -69
  32. natural_pdf/flows/element.py +25 -0
  33. natural_pdf/flows/flow.py +1658 -19
  34. natural_pdf/flows/region.py +757 -263
  35. natural_pdf/ocr/ocr_options.py +0 -2
  36. natural_pdf/ocr/utils.py +2 -1
  37. natural_pdf/qa/document_qa.py +21 -5
  38. natural_pdf/search/search_service_protocol.py +1 -1
  39. natural_pdf/selectors/parser.py +35 -2
  40. natural_pdf/tables/result.py +35 -1
  41. natural_pdf/text_mixin.py +101 -0
  42. natural_pdf/utils/debug.py +2 -1
  43. natural_pdf/utils/highlighting.py +1 -0
  44. natural_pdf/utils/layout.py +2 -2
  45. natural_pdf/utils/packaging.py +4 -3
  46. natural_pdf/utils/text_extraction.py +15 -12
  47. natural_pdf/utils/visualization.py +385 -0
  48. {natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/METADATA +7 -3
  49. {natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/RECORD +55 -52
  50. optimization/memory_comparison.py +1 -1
  51. optimization/pdf_analyzer.py +2 -2
  52. {natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/WHEEL +0 -0
  53. {natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/entry_points.txt +0 -0
  54. {natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/licenses/LICENSE +0 -0
  55. {natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/top_level.txt +0 -0
natural_pdf/__init__.py CHANGED
@@ -70,14 +70,16 @@ options = Options()
70
70
  # Version
71
71
  __version__ = "0.1.1"
72
72
 
73
- # Core imports
74
- from natural_pdf.collections.pdf_collection import PDFCollection
73
+ from natural_pdf.analyzers.guides import Guides
75
74
  from natural_pdf.core.page import Page
75
+ from natural_pdf.core.page_collection import PageCollection
76
76
  from natural_pdf.core.pdf import PDF
77
- from natural_pdf.elements.collections import ElementCollection
78
- from natural_pdf.elements.region import Region
79
77
 
80
- ElementCollection = None
78
+ # Core imports
79
+ from natural_pdf.core.pdf_collection import PDFCollection
80
+ from natural_pdf.elements.region import Region
81
+ from natural_pdf.flows.flow import Flow
82
+ from natural_pdf.flows.region import FlowRegion
81
83
 
82
84
  # Search options (if extras installed)
83
85
  try:
@@ -115,12 +117,15 @@ __all__ = [
115
117
  "PDFCollection",
116
118
  "Page",
117
119
  "Region",
118
- "ElementCollection",
120
+ "Flow",
121
+ "FlowRegion",
122
+ "Guides",
119
123
  "TextSearchOptions",
120
124
  "MultiModalSearchOptions",
121
125
  "BaseSearchOptions",
122
126
  "configure_logging",
123
127
  "options",
128
+ "PageCollection",
124
129
  ]
125
130
 
126
131
  # Add QA components to __all__ if available
@@ -8,23 +8,28 @@ from natural_pdf.analyzers.shape_detection_mixin import ShapeDetectionMixin
8
8
  from natural_pdf.analyzers.text_options import TextStyleOptions
9
9
  from natural_pdf.analyzers.text_structure import TextStyleAnalyzer
10
10
 
11
+
11
12
  # Lazy imports to avoid circular dependencies
12
13
  # These will be imported when actually accessed
13
14
  def __getattr__(name):
14
15
  if name == "LayoutAnalyzer":
15
16
  from natural_pdf.analyzers.layout.layout_analyzer import LayoutAnalyzer
17
+
16
18
  return LayoutAnalyzer
17
19
  elif name == "LayoutManager":
18
20
  from natural_pdf.analyzers.layout.layout_manager import LayoutManager
21
+
19
22
  return LayoutManager
20
23
  elif name == "LayoutOptions":
21
24
  from natural_pdf.analyzers.layout.layout_options import LayoutOptions
25
+
22
26
  return LayoutOptions
23
27
  raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
24
28
 
29
+
25
30
  __all__ = [
26
31
  "LayoutAnalyzer",
27
- "LayoutManager",
32
+ "LayoutManager",
28
33
  "LayoutOptions",
29
34
  "ShapeDetectionMixin",
30
35
  "TextStyleOptions",