natural-pdf 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/core/page.py +114 -18
- natural_pdf/core/page_collection.py +41 -19
- natural_pdf/core/pdf.py +14 -14
- natural_pdf/elements/element_collection.py +27 -8
- {natural_pdf-0.2.6.dist-info → natural_pdf-0.2.8.dist-info}/METADATA +1 -1
- {natural_pdf-0.2.6.dist-info → natural_pdf-0.2.8.dist-info}/RECORD +10 -10
- {natural_pdf-0.2.6.dist-info → natural_pdf-0.2.8.dist-info}/WHEEL +0 -0
- {natural_pdf-0.2.6.dist-info → natural_pdf-0.2.8.dist-info}/entry_points.txt +0 -0
- {natural_pdf-0.2.6.dist-info → natural_pdf-0.2.8.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.2.6.dist-info → natural_pdf-0.2.8.dist-info}/top_level.txt +0 -0
natural_pdf/core/page.py
CHANGED
@@ -717,14 +717,23 @@ class Page(
|
|
717
717
|
|
718
718
|
# Add PDF-level exclusions if we have a parent PDF
|
719
719
|
if hasattr(self, "_parent") and self._parent and hasattr(self._parent, "_exclusions"):
|
720
|
+
# Get existing labels to check for duplicates
|
721
|
+
existing_labels = set()
|
722
|
+
for exc in all_exclusions:
|
723
|
+
if len(exc) >= 2 and exc[1]: # Has a label
|
724
|
+
existing_labels.add(exc[1])
|
725
|
+
|
720
726
|
for pdf_exclusion in self._parent._exclusions:
|
721
|
-
# Check if this exclusion is already in our list (avoid duplicates)
|
722
|
-
if pdf_exclusion
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
727
|
+
# Check if this exclusion label is already in our list (avoid duplicates)
|
728
|
+
label = pdf_exclusion[1] if len(pdf_exclusion) >= 2 else None
|
729
|
+
if label and label in existing_labels:
|
730
|
+
continue # Skip this exclusion as it's already been applied
|
731
|
+
|
732
|
+
# Ensure consistent format (PDF exclusions might be 2-tuples, need to be 3-tuples)
|
733
|
+
if len(pdf_exclusion) == 2:
|
734
|
+
# Convert to 3-tuple format with default method
|
735
|
+
pdf_exclusion = (pdf_exclusion[0], pdf_exclusion[1], "region")
|
736
|
+
all_exclusions.append(pdf_exclusion)
|
728
737
|
|
729
738
|
if debug:
|
730
739
|
print(
|
@@ -829,6 +838,36 @@ class Page(
|
|
829
838
|
regions.append(exclusion_item) # Label is already on the Region object
|
830
839
|
if debug:
|
831
840
|
print(f" - Added direct region '{label}': {exclusion_item}")
|
841
|
+
|
842
|
+
# Process string selectors (from PDF-level exclusions)
|
843
|
+
elif isinstance(exclusion_item, str):
|
844
|
+
selector_str = exclusion_item
|
845
|
+
matching_elements = self.find_all(selector_str, apply_exclusions=False)
|
846
|
+
|
847
|
+
if debug:
|
848
|
+
print(
|
849
|
+
f" - Evaluating selector '{exclusion_label}': found {len(matching_elements)} elements"
|
850
|
+
)
|
851
|
+
|
852
|
+
if method == "region":
|
853
|
+
# Convert each matching element to a region
|
854
|
+
for el in matching_elements:
|
855
|
+
try:
|
856
|
+
bbox_coords = (
|
857
|
+
float(el.x0),
|
858
|
+
float(el.top),
|
859
|
+
float(el.x1),
|
860
|
+
float(el.bottom),
|
861
|
+
)
|
862
|
+
region = Region(self, bbox_coords, label=label)
|
863
|
+
regions.append(region)
|
864
|
+
if debug:
|
865
|
+
print(f" ✓ Added region from selector match: {bbox_coords}")
|
866
|
+
except Exception as e:
|
867
|
+
if debug:
|
868
|
+
print(f" ✗ Failed to create region from element: {e}")
|
869
|
+
# If method is "element", it will be handled in _filter_elements_by_exclusions
|
870
|
+
|
832
871
|
# Element-based exclusions are not converted to regions here
|
833
872
|
# They will be handled separately in _filter_elements_by_exclusions
|
834
873
|
|
@@ -852,7 +891,16 @@ class Page(
|
|
852
891
|
Returns:
|
853
892
|
A new list containing only the elements not excluded.
|
854
893
|
"""
|
855
|
-
|
894
|
+
# Check both page-level and PDF-level exclusions
|
895
|
+
has_page_exclusions = bool(self._exclusions)
|
896
|
+
has_pdf_exclusions = (
|
897
|
+
hasattr(self, "_parent")
|
898
|
+
and self._parent
|
899
|
+
and hasattr(self._parent, "_exclusions")
|
900
|
+
and bool(self._parent._exclusions)
|
901
|
+
)
|
902
|
+
|
903
|
+
if not has_page_exclusions and not has_pdf_exclusions:
|
856
904
|
if debug_exclusions:
|
857
905
|
print(
|
858
906
|
f"Page {self.index}: No exclusions defined, returning all {len(elements)} elements."
|
@@ -865,9 +913,15 @@ class Page(
|
|
865
913
|
)
|
866
914
|
|
867
915
|
# Collect element-based exclusions
|
868
|
-
|
916
|
+
# Store element bboxes for comparison instead of object ids
|
917
|
+
excluded_element_bboxes = set() # Use set for O(1) lookup
|
918
|
+
|
919
|
+
# Process both page-level and PDF-level exclusions
|
920
|
+
all_exclusions = list(self._exclusions) if has_page_exclusions else []
|
921
|
+
if has_pdf_exclusions:
|
922
|
+
all_exclusions.extend(self._parent._exclusions)
|
869
923
|
|
870
|
-
for exclusion_data in
|
924
|
+
for exclusion_data in all_exclusions:
|
871
925
|
# Handle both old format (2-tuple) and new format (3-tuple)
|
872
926
|
if len(exclusion_data) == 2:
|
873
927
|
exclusion_item, label = exclusion_data
|
@@ -883,16 +937,31 @@ class Page(
|
|
883
937
|
if isinstance(exclusion_item, Region):
|
884
938
|
continue
|
885
939
|
|
940
|
+
# Handle string selectors for element-based exclusions
|
941
|
+
if isinstance(exclusion_item, str) and method == "element":
|
942
|
+
selector_str = exclusion_item
|
943
|
+
matching_elements = self.find_all(selector_str, apply_exclusions=False)
|
944
|
+
for el in matching_elements:
|
945
|
+
if hasattr(el, "bbox"):
|
946
|
+
bbox = tuple(el.bbox)
|
947
|
+
excluded_element_bboxes.add(bbox)
|
948
|
+
if debug_exclusions:
|
949
|
+
print(
|
950
|
+
f" - Added element exclusion from selector '{selector_str}': {bbox}"
|
951
|
+
)
|
952
|
+
|
886
953
|
# Handle element-based exclusions
|
887
|
-
|
888
|
-
|
954
|
+
elif method == "element" and hasattr(exclusion_item, "bbox"):
|
955
|
+
# Store bbox tuple for comparison
|
956
|
+
bbox = tuple(exclusion_item.bbox)
|
957
|
+
excluded_element_bboxes.add(bbox)
|
889
958
|
if debug_exclusions:
|
890
|
-
print(f" - Added element exclusion: {exclusion_item}")
|
959
|
+
print(f" - Added element exclusion with bbox {bbox}: {exclusion_item}")
|
891
960
|
|
892
961
|
if debug_exclusions:
|
893
962
|
print(
|
894
963
|
f"Page {self.index}: Applying {len(exclusion_regions)} region exclusions "
|
895
|
-
f"and {len(
|
964
|
+
f"and {len(excluded_element_bboxes)} element exclusions to {len(elements)} elements."
|
896
965
|
)
|
897
966
|
|
898
967
|
filtered_elements = []
|
@@ -903,7 +972,7 @@ class Page(
|
|
903
972
|
exclude = False
|
904
973
|
|
905
974
|
# Check element-based exclusions first (faster)
|
906
|
-
if
|
975
|
+
if hasattr(element, "bbox") and tuple(element.bbox) in excluded_element_bboxes:
|
907
976
|
exclude = True
|
908
977
|
element_excluded_count += 1
|
909
978
|
if debug_exclusions:
|
@@ -2487,10 +2556,23 @@ class Page(
|
|
2487
2556
|
return self
|
2488
2557
|
|
2489
2558
|
def get_section_between(
|
2490
|
-
self,
|
2559
|
+
self,
|
2560
|
+
start_element=None,
|
2561
|
+
end_element=None,
|
2562
|
+
include_boundaries="both",
|
2563
|
+
orientation="vertical",
|
2491
2564
|
) -> Optional["Region"]: # Return Optional
|
2492
2565
|
"""
|
2493
2566
|
Get a section between two elements on this page.
|
2567
|
+
|
2568
|
+
Args:
|
2569
|
+
start_element: Element marking the start of the section
|
2570
|
+
end_element: Element marking the end of the section
|
2571
|
+
include_boundaries: How to include boundary elements: 'start', 'end', 'both', or 'none'
|
2572
|
+
orientation: 'vertical' (default) or 'horizontal' - determines section direction
|
2573
|
+
|
2574
|
+
Returns:
|
2575
|
+
Region representing the section
|
2494
2576
|
"""
|
2495
2577
|
# Create a full-page region to operate within
|
2496
2578
|
page_region = self.create_region(0, 0, self.width, self.height)
|
@@ -2501,6 +2583,7 @@ class Page(
|
|
2501
2583
|
start_element=start_element,
|
2502
2584
|
end_element=end_element,
|
2503
2585
|
include_boundaries=include_boundaries,
|
2586
|
+
orientation=orientation,
|
2504
2587
|
)
|
2505
2588
|
except Exception as e:
|
2506
2589
|
logger.error(
|
@@ -2575,10 +2658,23 @@ class Page(
|
|
2575
2658
|
if include_boundaries not in valid_inclusions:
|
2576
2659
|
raise ValueError(f"include_boundaries must be one of {valid_inclusions}")
|
2577
2660
|
|
2578
|
-
if not start_elements:
|
2579
|
-
# Return an empty ElementCollection if no
|
2661
|
+
if not start_elements and not end_elements:
|
2662
|
+
# Return an empty ElementCollection if no boundary elements at all
|
2580
2663
|
return ElementCollection([])
|
2581
2664
|
|
2665
|
+
# If we only have end elements, create implicit start elements
|
2666
|
+
if not start_elements and end_elements:
|
2667
|
+
# Delegate to PageCollection implementation for consistency
|
2668
|
+
from natural_pdf.core.page_collection import PageCollection
|
2669
|
+
|
2670
|
+
pages = PageCollection([self])
|
2671
|
+
return pages.get_sections(
|
2672
|
+
start_elements=start_elements,
|
2673
|
+
end_elements=end_elements,
|
2674
|
+
include_boundaries=include_boundaries,
|
2675
|
+
orientation=orientation,
|
2676
|
+
)
|
2677
|
+
|
2582
2678
|
# Combine start and end elements with their type
|
2583
2679
|
all_boundaries = []
|
2584
2680
|
for el in start_elements:
|
@@ -537,10 +537,14 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
|
|
537
537
|
first_page = self.pages[0]
|
538
538
|
first_start = Region(first_page, (0, 0, first_page.width, 1))
|
539
539
|
first_start.is_implicit_start = True
|
540
|
+
# Don't mark this as created from any end element, so it can pair with any end
|
540
541
|
start_elements.append(first_start)
|
541
542
|
|
542
543
|
# For each end element (except the last), add an implicit start after it
|
543
|
-
|
544
|
+
# Sort by page, then top, then bottom (for elements with same top), then x0
|
545
|
+
sorted_end_elements = sorted(
|
546
|
+
end_elements, key=lambda e: (e.page.index, e.top, e.bottom, e.x0)
|
547
|
+
)
|
544
548
|
for i, end_elem in enumerate(sorted_end_elements[:-1]): # Exclude last end element
|
545
549
|
# Create implicit start element right after this end element
|
546
550
|
implicit_start = Region(
|
@@ -838,29 +842,47 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
|
|
838
842
|
# Create a section from current_start to just before this boundary
|
839
843
|
start_element = current_start["element"]
|
840
844
|
|
841
|
-
#
|
845
|
+
# Create section from current start to just before this new start
|
842
846
|
if start_element.page == boundary["element"].page:
|
843
|
-
|
844
|
-
|
845
|
-
|
847
|
+
from natural_pdf.elements.region import Region
|
848
|
+
|
849
|
+
next_start = boundary["element"]
|
850
|
+
|
851
|
+
# Create section based on orientation
|
846
852
|
if orientation == "vertical":
|
847
|
-
|
853
|
+
# Determine vertical bounds
|
854
|
+
if include_boundaries in ["start", "both"]:
|
855
|
+
top = start_element.top
|
856
|
+
else:
|
857
|
+
top = start_element.bottom
|
858
|
+
|
859
|
+
# The section ends just before the next start
|
860
|
+
bottom = next_start.top
|
861
|
+
|
862
|
+
# Create the section with full page width
|
863
|
+
if top < bottom:
|
864
|
+
section = Region(
|
865
|
+
start_element.page, (0, top, start_element.page.width, bottom)
|
866
|
+
)
|
867
|
+
section.start_element = start_element
|
868
|
+
sections.append(section)
|
848
869
|
else: # horizontal
|
849
|
-
|
870
|
+
# Determine horizontal bounds
|
871
|
+
if include_boundaries in ["start", "both"]:
|
872
|
+
left = start_element.x0
|
873
|
+
else:
|
874
|
+
left = start_element.x1
|
850
875
|
|
851
|
-
|
852
|
-
|
853
|
-
page_elements.index(boundary["element"]) - 1
|
854
|
-
if boundary["element"] in page_elements
|
855
|
-
else -1
|
856
|
-
)
|
857
|
-
end_element = page_elements[end_idx] if end_idx >= 0 else None
|
876
|
+
# The section ends just before the next start
|
877
|
+
right = next_start.x0
|
858
878
|
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
879
|
+
# Create the section with full page height
|
880
|
+
if left < right:
|
881
|
+
section = Region(
|
882
|
+
start_element.page, (left, 0, right, start_element.page.height)
|
883
|
+
)
|
884
|
+
section.start_element = start_element
|
885
|
+
sections.append(section)
|
864
886
|
else:
|
865
887
|
# Cross-page section - create from current_start to the end of its page
|
866
888
|
from natural_pdf.elements.region import Region
|
natural_pdf/core/pdf.py
CHANGED
@@ -252,6 +252,16 @@ class _LazyPageList(Sequence):
|
|
252
252
|
logger.warning(f"Failed to apply region to page {cached.number}: {e}")
|
253
253
|
|
254
254
|
self._cache[index] = cached
|
255
|
+
|
256
|
+
# Also cache in the parent PDF's main page list if this is a slice
|
257
|
+
if (
|
258
|
+
hasattr(self._parent_pdf, "_pages")
|
259
|
+
and hasattr(self._parent_pdf._pages, "_cache")
|
260
|
+
and actual_page_index < len(self._parent_pdf._pages._cache)
|
261
|
+
and self._parent_pdf._pages._cache[actual_page_index] is None
|
262
|
+
):
|
263
|
+
self._parent_pdf._pages._cache[actual_page_index] = cached
|
264
|
+
|
255
265
|
return cached
|
256
266
|
|
257
267
|
# Sequence protocol ---------------------------------------------------
|
@@ -720,26 +730,16 @@ class PDF(
|
|
720
730
|
# Store for bookkeeping and lazy application
|
721
731
|
self._exclusions.append((exclusion_func, label))
|
722
732
|
|
723
|
-
#
|
724
|
-
|
725
|
-
if self._pages._cache[i] is not None: # Only apply to existing pages
|
726
|
-
try:
|
727
|
-
self._pages._cache[i].add_exclusion(exclusion_func, label=label)
|
728
|
-
except Exception as e:
|
729
|
-
logger.warning(f"Failed to apply exclusion to existing page {i}: {e}")
|
733
|
+
# Don't modify already-cached pages - they will get PDF-level exclusions
|
734
|
+
# dynamically through _get_exclusion_regions()
|
730
735
|
return self
|
731
736
|
|
732
737
|
# Fallback to original callable / Region behaviour ------------------
|
733
738
|
exclusion_data = (exclusion_func, label)
|
734
739
|
self._exclusions.append(exclusion_data)
|
735
740
|
|
736
|
-
#
|
737
|
-
|
738
|
-
if self._pages._cache[i] is not None: # Only apply to existing pages
|
739
|
-
try:
|
740
|
-
self._pages._cache[i].add_exclusion(exclusion_func, label=label)
|
741
|
-
except Exception as e:
|
742
|
-
logger.warning(f"Failed to apply exclusion to existing page {i}: {e}")
|
741
|
+
# Don't modify already-cached pages - they will get PDF-level exclusions
|
742
|
+
# dynamically through _get_exclusion_regions()
|
743
743
|
|
744
744
|
return self
|
745
745
|
|
@@ -621,6 +621,7 @@ class ElementCollection(
|
|
621
621
|
|
622
622
|
def extract_text(
|
623
623
|
self,
|
624
|
+
separator: str = " ",
|
624
625
|
preserve_whitespace: bool = True,
|
625
626
|
use_exclusions: bool = True,
|
626
627
|
strip: Optional[bool] = None,
|
@@ -632,6 +633,9 @@ class ElementCollection(
|
|
632
633
|
pdfplumber's layout engine if layout=True is specified.
|
633
634
|
|
634
635
|
Args:
|
636
|
+
separator: String to insert between text from different elements when
|
637
|
+
using simple joining (layout=False). Default is a single space.
|
638
|
+
Ignored when layout=True as the layout engine handles spacing.
|
635
639
|
preserve_whitespace: Deprecated. Use layout=False for simple joining.
|
636
640
|
use_exclusions: Deprecated. Exclusions should be applied *before* creating
|
637
641
|
the collection or by filtering the collection itself.
|
@@ -668,7 +672,7 @@ class ElementCollection(
|
|
668
672
|
logger.warning(
|
669
673
|
"ElementCollection.extract_text: No character dictionaries found in TextElements."
|
670
674
|
)
|
671
|
-
return
|
675
|
+
return separator.join(
|
672
676
|
getattr(el, "text", "") for el in text_elements
|
673
677
|
) # Fallback to simple join of word text
|
674
678
|
|
@@ -733,18 +737,33 @@ class ElementCollection(
|
|
733
737
|
all_char_dicts.sort(
|
734
738
|
key=lambda c: (c.get("page_number", 0), c.get("top", 0), c.get("x0", 0))
|
735
739
|
)
|
736
|
-
result =
|
740
|
+
result = separator.join(c.get("text", "") for c in all_char_dicts)
|
737
741
|
|
738
742
|
else:
|
739
743
|
# Default: Simple join without layout
|
740
744
|
logger.debug("ElementCollection.extract_text: Using simple join (layout=False).")
|
741
|
-
|
742
|
-
|
743
|
-
|
745
|
+
|
746
|
+
# Instead of joining all characters individually, we need to:
|
747
|
+
# 1. Extract text from each element
|
748
|
+
# 2. Join the element texts with the separator
|
749
|
+
|
750
|
+
# Sort elements by document order (page, top, x0)
|
751
|
+
sorted_elements = sorted(
|
752
|
+
text_elements,
|
753
|
+
key=lambda el: (
|
754
|
+
el.page.index if hasattr(el, "page") else 0,
|
755
|
+
el.top if hasattr(el, "top") else 0,
|
756
|
+
el.x0 if hasattr(el, "x0") else 0,
|
757
|
+
),
|
744
758
|
)
|
745
|
-
|
746
|
-
|
747
|
-
|
759
|
+
|
760
|
+
# Extract text from each element
|
761
|
+
element_texts = []
|
762
|
+
for el in sorted_elements:
|
763
|
+
if hasattr(el, "text") and el.text:
|
764
|
+
element_texts.append(el.text)
|
765
|
+
|
766
|
+
result = separator.join(element_texts)
|
748
767
|
|
749
768
|
# Determine final strip flag – same rule as global helper unless caller overrides
|
750
769
|
strip_text = strip if strip is not None else (not use_layout)
|
@@ -27,10 +27,10 @@ natural_pdf/collections/mixins.py,sha256=Se2C5AcpP9B5E0d0pIrey6-f_P32tAXTK4M7666
|
|
27
27
|
natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
|
28
28
|
natural_pdf/core/element_manager.py,sha256=KPuKM7SstfErTkRnGq4vrgE0Tv8iazN13Jp7yAXGKso,55575
|
29
29
|
natural_pdf/core/highlighting_service.py,sha256=7on8nErhi50CEH2L4XzGIZ6tIqZtMzmmFlp-2lmwnYE,68856
|
30
|
-
natural_pdf/core/page.py,sha256=
|
31
|
-
natural_pdf/core/page_collection.py,sha256=
|
30
|
+
natural_pdf/core/page.py,sha256=XmXii652iM-JVKgzpbKQ8f59U0TvDLD5iAfdtx92gis,152675
|
31
|
+
natural_pdf/core/page_collection.py,sha256=IjdFq9q0D0P6ZKWInf0H25rLzxfMb7RsUXucogkhNkU,63169
|
32
32
|
natural_pdf/core/page_groupby.py,sha256=V2e_RNlHaasUzYm2h2vNJI7_aV_fl3_pg7kU3F2j0z8,8218
|
33
|
-
natural_pdf/core/pdf.py,sha256=
|
33
|
+
natural_pdf/core/pdf.py,sha256=ovdeu9TRPnVYyMltD7QpcdcFYBLZFXh3LlfC5ifj6RY,104227
|
34
34
|
natural_pdf/core/pdf_collection.py,sha256=s3ogu4CEHrHMTRqQMJUKJZ-9Ii8b_B9dWbVLTFj0s7g,34992
|
35
35
|
natural_pdf/core/render_spec.py,sha256=y9QkMiIvWaKiEBlV0TjyldADIEUY3YfWLQXxStHu1S4,15480
|
36
36
|
natural_pdf/describe/__init__.py,sha256=kIV7ORmWWB1SAur7nK2aAwR-wHqSedhKfUsaUl4hG0A,586
|
@@ -40,7 +40,7 @@ natural_pdf/describe/mixin.py,sha256=rkX14aGrSz7Jvxx8Rbxv3eSfbO-_29DipwpstrV2pDQ
|
|
40
40
|
natural_pdf/describe/summary.py,sha256=cfT4ZQkeatCDAOwWPwhtEVXisNgk6E57fAXAnoRysSU,7645
|
41
41
|
natural_pdf/elements/__init__.py,sha256=ICNikmLeIEuSYypz-KnkBn8xR1hR7rge4hsa1KLkyWY,42
|
42
42
|
natural_pdf/elements/base.py,sha256=92ukTtRCQFsa5KvKflChCt4mt0ZGS4ecGYCQTNMO4zU,58907
|
43
|
-
natural_pdf/elements/element_collection.py,sha256
|
43
|
+
natural_pdf/elements/element_collection.py,sha256=-piFQGiDPiqmnl-Cpoi3PGPmGe4AYvpl0IqaJGxBsBc,129405
|
44
44
|
natural_pdf/elements/image.py,sha256=zu-P2Y8fRoEXf6IeZU0EYRWsgZ6I_a5vy1FA3VXTGkQ,1424
|
45
45
|
natural_pdf/elements/line.py,sha256=TFn7KXjPT_jUQyQyabU0F7XYU4dC-qadwodJMZF4DCU,3844
|
46
46
|
natural_pdf/elements/rect.py,sha256=0lNkVkPkvbRbrFED856RXoUcTcDkeeOIs5xldKGAQT8,3324
|
@@ -107,7 +107,7 @@ natural_pdf/vision/results.py,sha256=F2zXG3MVZIpOUvPkJHotOq6-9rFz68BaO_8pnSndlOs
|
|
107
107
|
natural_pdf/vision/similarity.py,sha256=YH8legN-t9uf1b_XULi4JLNDaRfPNKQwU1FZ4Qu08jY,11740
|
108
108
|
natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
|
109
109
|
natural_pdf/widgets/viewer.py,sha256=KW3JogdR2TMg2ECUMYp8hwd060hfg8EsYBWxb5IEzBY,24942
|
110
|
-
natural_pdf-0.2.
|
110
|
+
natural_pdf-0.2.8.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
|
111
111
|
optimization/memory_comparison.py,sha256=0i_foFSRmppj-fY069qjwH36s_zkx-1L2ASAAlepWzA,6541
|
112
112
|
optimization/pdf_analyzer.py,sha256=HjrmTgu2qchxPeDckc5kjgxppGwd40UESrYS9Myj7pY,19352
|
113
113
|
optimization/performance_analysis.py,sha256=JBXnR9hc7Ix7YCnt3EJPSpsyqIUgKsc7GEffQ_TDCBk,13033
|
@@ -124,8 +124,8 @@ tools/bad_pdf_eval/llm_enrich.py,sha256=mCh4KGi1HmIkzGjj5rrHz1Osd7sEX1IZ_FW08H1t
|
|
124
124
|
tools/bad_pdf_eval/llm_enrich_with_retry.py,sha256=XUtPF1hUvqd3frDXT0wDTXoonuAivhjM5vgFdZ-tm0A,9373
|
125
125
|
tools/bad_pdf_eval/reporter.py,sha256=e1g__mkSB4q02p3mGWOwMhvFs7F2HJosNBxup0-LkyU,400
|
126
126
|
tools/bad_pdf_eval/utils.py,sha256=hR95XQ7qf7Cu6BdyX0L7ggGVx-ah5sK0jHWblTJUUic,4896
|
127
|
-
natural_pdf-0.2.
|
128
|
-
natural_pdf-0.2.
|
129
|
-
natural_pdf-0.2.
|
130
|
-
natural_pdf-0.2.
|
131
|
-
natural_pdf-0.2.
|
127
|
+
natural_pdf-0.2.8.dist-info/METADATA,sha256=tuWXV-mY9zU0qsVsXhrrp3aGBfSxlklUxS_Dlllqmp4,6959
|
128
|
+
natural_pdf-0.2.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
129
|
+
natural_pdf-0.2.8.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
|
130
|
+
natural_pdf-0.2.8.dist-info/top_level.txt,sha256=80t0F2ZeX4vN4Ke5iTflcOk_PN_0USn33ha3X6X86Ik,36
|
131
|
+
natural_pdf-0.2.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|