natural-pdf 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/analyzers/guides.py +185 -9
- natural_pdf/core/element_manager.py +5 -0
- natural_pdf/core/page.py +40 -4
- natural_pdf/core/pdf.py +41 -2
- natural_pdf/elements/region.py +32 -13
- natural_pdf/tables/result.py +39 -6
- {natural_pdf-0.2.4.dist-info → natural_pdf-0.2.5.dist-info}/METADATA +1 -1
- {natural_pdf-0.2.4.dist-info → natural_pdf-0.2.5.dist-info}/RECORD +12 -12
- {natural_pdf-0.2.4.dist-info → natural_pdf-0.2.5.dist-info}/WHEEL +0 -0
- {natural_pdf-0.2.4.dist-info → natural_pdf-0.2.5.dist-info}/entry_points.txt +0 -0
- {natural_pdf-0.2.4.dist-info → natural_pdf-0.2.5.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.2.4.dist-info → natural_pdf-0.2.5.dist-info}/top_level.txt +0 -0
natural_pdf/analyzers/guides.py
CHANGED
@@ -143,7 +143,7 @@ class GuidesList(UserList):
|
|
143
143
|
|
144
144
|
def from_content(
|
145
145
|
self,
|
146
|
-
markers: Union[str, List[str], "ElementCollection", None],
|
146
|
+
markers: Union[str, List[str], "ElementCollection", Callable, None],
|
147
147
|
obj: Optional[Union["Page", "Region", "FlowRegion"]] = None,
|
148
148
|
align: Literal["left", "right", "center", "between"] = "left",
|
149
149
|
outer: bool = True,
|
@@ -160,6 +160,7 @@ class GuidesList(UserList):
|
|
160
160
|
- str: single selector (e.g., 'text:contains("Name")') or literal text
|
161
161
|
- List[str]: list of selectors or literal text strings
|
162
162
|
- ElementCollection: collection of elements to extract text from
|
163
|
+
- Callable: function that takes a page and returns markers
|
163
164
|
- None: no markers
|
164
165
|
obj: Page/Region/FlowRegion to search (uses parent's context if None)
|
165
166
|
align: How to align guides relative to found elements
|
@@ -174,13 +175,22 @@ class GuidesList(UserList):
|
|
174
175
|
if target_obj is None:
|
175
176
|
raise ValueError("No object provided and no context available")
|
176
177
|
|
178
|
+
# Store callable markers for later evaluation
|
179
|
+
if callable(markers):
|
180
|
+
self._callable = markers
|
181
|
+
# For now, evaluate with the current target object to get initial guides
|
182
|
+
actual_markers = markers(target_obj)
|
183
|
+
else:
|
184
|
+
self._callable = None
|
185
|
+
actual_markers = markers
|
186
|
+
|
177
187
|
# Check if parent is in flow mode
|
178
188
|
if self._parent.is_flow_region:
|
179
189
|
# Create guides across all constituent regions
|
180
190
|
all_guides = []
|
181
191
|
for region in self._parent.context.constituent_regions:
|
182
192
|
# Normalize markers for this region
|
183
|
-
marker_texts = _normalize_markers(
|
193
|
+
marker_texts = _normalize_markers(actual_markers, region)
|
184
194
|
|
185
195
|
# Create guides for this region
|
186
196
|
region_guides = Guides.from_content(
|
@@ -263,7 +273,7 @@ class GuidesList(UserList):
|
|
263
273
|
|
264
274
|
# Original single-region logic
|
265
275
|
# Normalize markers to list of text strings
|
266
|
-
marker_texts = _normalize_markers(
|
276
|
+
marker_texts = _normalize_markers(actual_markers, target_obj)
|
267
277
|
|
268
278
|
# Create guides for this axis
|
269
279
|
new_guides = Guides.from_content(
|
@@ -1541,11 +1551,15 @@ class Guides:
|
|
1541
1551
|
# Add outer guides if requested
|
1542
1552
|
if outer and bounds:
|
1543
1553
|
if axis == "vertical":
|
1544
|
-
|
1545
|
-
|
1554
|
+
if outer == True or outer == "first":
|
1555
|
+
guides_coords.insert(0, bounds[0]) # x0
|
1556
|
+
if outer == True or outer == "last":
|
1557
|
+
guides_coords.append(bounds[2]) # x1
|
1546
1558
|
else:
|
1547
|
-
|
1548
|
-
|
1559
|
+
if outer == True or outer == "first":
|
1560
|
+
guides_coords.insert(0, bounds[1]) # y0
|
1561
|
+
if outer == True or outer == "last":
|
1562
|
+
guides_coords.append(bounds[3]) # y1
|
1549
1563
|
|
1550
1564
|
# Remove duplicates and sort
|
1551
1565
|
guides_coords = sorted(list(set(guides_coords)))
|
@@ -3302,7 +3316,7 @@ class Guides:
|
|
3302
3316
|
markers: Union[str, List[str], "ElementCollection", None] = None,
|
3303
3317
|
obj: Optional[Union["Page", "Region"]] = None,
|
3304
3318
|
align: Literal["left", "right", "center", "between"] = "left",
|
3305
|
-
outer: bool = True,
|
3319
|
+
outer: Union[str, bool] = True,
|
3306
3320
|
tolerance: float = 5,
|
3307
3321
|
apply_exclusions: bool = True,
|
3308
3322
|
) -> "Guides":
|
@@ -3319,7 +3333,10 @@ class Guides:
|
|
3319
3333
|
- None: no markers
|
3320
3334
|
obj: Page or Region to search (uses self.context if None)
|
3321
3335
|
align: How to align guides relative to found elements
|
3322
|
-
outer: Whether to add outer boundary guides
|
3336
|
+
outer: Whether to add outer boundary guides. Can be:
|
3337
|
+
- bool: True/False to add/not add both
|
3338
|
+
- "first": To add boundary before the first element
|
3339
|
+
- "last": To add boundary before the last element
|
3323
3340
|
tolerance: Tolerance for snapping to element edges
|
3324
3341
|
apply_exclusions: Whether to apply exclusion zones when searching for text
|
3325
3342
|
|
@@ -3457,6 +3474,7 @@ class Guides:
|
|
3457
3474
|
cell_extraction_func: Optional[Callable[["Region"], Optional[str]]] = None,
|
3458
3475
|
show_progress: bool = False,
|
3459
3476
|
content_filter: Optional[Union[str, Callable[[str], bool], List[str]]] = None,
|
3477
|
+
apply_exclusions: bool = True,
|
3460
3478
|
*,
|
3461
3479
|
multi_page: Literal["auto", True, False] = "auto",
|
3462
3480
|
) -> "TableResult":
|
@@ -3482,6 +3500,7 @@ class Guides:
|
|
3482
3500
|
cell_extraction_func: Optional callable for custom cell text extraction
|
3483
3501
|
show_progress: Controls progress bar for text method
|
3484
3502
|
content_filter: Content filtering function or patterns
|
3503
|
+
apply_exclusions: Whether to apply exclusion regions during text extraction (default: True)
|
3485
3504
|
multi_page: Controls multi-region table creation for FlowRegions
|
3486
3505
|
|
3487
3506
|
Returns:
|
@@ -3552,6 +3571,7 @@ class Guides:
|
|
3552
3571
|
cell_extraction_func=cell_extraction_func,
|
3553
3572
|
show_progress=show_progress,
|
3554
3573
|
content_filter=content_filter,
|
3574
|
+
apply_exclusions=apply_exclusions,
|
3555
3575
|
)
|
3556
3576
|
|
3557
3577
|
return table_result
|
@@ -3577,6 +3597,162 @@ class Guides:
|
|
3577
3597
|
except Exception as cleanup_err:
|
3578
3598
|
logger.warning(f"Failed to clean up temporary regions: {cleanup_err}")
|
3579
3599
|
|
3600
|
+
def extract_table_from_pages(
|
3601
|
+
self,
|
3602
|
+
pages: Union["PageCollection", List["Page"]],
|
3603
|
+
header: Union[str, List[str], None] = "first",
|
3604
|
+
skip_repeating_headers: Optional[bool] = None,
|
3605
|
+
method: Optional[str] = None,
|
3606
|
+
table_settings: Optional[dict] = None,
|
3607
|
+
use_ocr: bool = False,
|
3608
|
+
ocr_config: Optional[dict] = None,
|
3609
|
+
text_options: Optional[Dict] = None,
|
3610
|
+
cell_extraction_func: Optional[Callable[["Region"], Optional[str]]] = None,
|
3611
|
+
show_progress: bool = True,
|
3612
|
+
content_filter: Optional[Union[str, Callable[[str], bool], List[str]]] = None,
|
3613
|
+
apply_exclusions: bool = True,
|
3614
|
+
) -> "TableResult":
|
3615
|
+
"""
|
3616
|
+
Extract tables from multiple pages using this guide pattern.
|
3617
|
+
|
3618
|
+
This method applies the guide to each page, extracts tables, and combines
|
3619
|
+
them into a single TableResult. Dynamic guides (using lambdas) are evaluated
|
3620
|
+
for each page.
|
3621
|
+
|
3622
|
+
Args:
|
3623
|
+
pages: PageCollection or list of Pages to extract from
|
3624
|
+
header: How to handle headers:
|
3625
|
+
- "first": Use first row of first page as headers (default)
|
3626
|
+
- "all": Expect headers on each page, use from first page
|
3627
|
+
- None: No headers, use numeric indices
|
3628
|
+
- List[str]: Custom column names
|
3629
|
+
skip_repeating_headers: Whether to remove duplicate header rows.
|
3630
|
+
Defaults to True when header is "first" or "all", False otherwise.
|
3631
|
+
method: Table extraction method (passed to extract_table)
|
3632
|
+
table_settings: Settings for pdfplumber table extraction
|
3633
|
+
use_ocr: Whether to use OCR for text extraction
|
3634
|
+
ocr_config: OCR configuration parameters
|
3635
|
+
text_options: Dictionary of options for the 'text' method
|
3636
|
+
cell_extraction_func: Optional callable for custom cell text extraction
|
3637
|
+
show_progress: Show progress bar for multi-page extraction (default: True)
|
3638
|
+
content_filter: Content filtering function or patterns
|
3639
|
+
apply_exclusions: Whether to apply exclusion regions during extraction
|
3640
|
+
|
3641
|
+
Returns:
|
3642
|
+
TableResult: Combined table data from all pages
|
3643
|
+
|
3644
|
+
Example:
|
3645
|
+
```python
|
3646
|
+
# Create guide with static vertical, dynamic horizontal
|
3647
|
+
guide = Guides(pages[0])
|
3648
|
+
guide.vertical.from_content(columns, outer="last")
|
3649
|
+
guide.horizontal.from_content(lambda p: p.find_all('text:starts-with(NF-)'))
|
3650
|
+
|
3651
|
+
# Extract from all pages
|
3652
|
+
table_result = guide.extract_table_from_pages(pages, header=columns)
|
3653
|
+
df = table_result.to_df()
|
3654
|
+
```
|
3655
|
+
"""
|
3656
|
+
from natural_pdf.core.page_collection import PageCollection
|
3657
|
+
from natural_pdf.tables.result import TableResult
|
3658
|
+
|
3659
|
+
# Convert to list if it's a PageCollection
|
3660
|
+
if isinstance(pages, PageCollection):
|
3661
|
+
page_list = list(pages)
|
3662
|
+
else:
|
3663
|
+
page_list = pages
|
3664
|
+
|
3665
|
+
if not page_list:
|
3666
|
+
return TableResult([])
|
3667
|
+
|
3668
|
+
# Determine header handling
|
3669
|
+
if skip_repeating_headers is None:
|
3670
|
+
skip_repeating_headers = header in ["first", "all"] or isinstance(header, list)
|
3671
|
+
|
3672
|
+
all_rows = []
|
3673
|
+
header_row = None
|
3674
|
+
|
3675
|
+
# Configure progress bar
|
3676
|
+
iterator = page_list
|
3677
|
+
if show_progress and len(page_list) > 1:
|
3678
|
+
try:
|
3679
|
+
from tqdm.auto import tqdm
|
3680
|
+
|
3681
|
+
iterator = tqdm(page_list, desc="Extracting tables from pages", unit="page")
|
3682
|
+
except ImportError:
|
3683
|
+
pass
|
3684
|
+
|
3685
|
+
for i, page in enumerate(iterator):
|
3686
|
+
# Create a new Guides object for this page
|
3687
|
+
page_guide = Guides(page)
|
3688
|
+
|
3689
|
+
# Copy vertical guides (usually static)
|
3690
|
+
if hasattr(self.vertical, "_callable") and self.vertical._callable is not None:
|
3691
|
+
# If vertical is dynamic (lambda), evaluate it
|
3692
|
+
page_guide.vertical.from_content(self.vertical._callable(page))
|
3693
|
+
else:
|
3694
|
+
# Copy static vertical positions
|
3695
|
+
page_guide.vertical.data = self.vertical.data.copy()
|
3696
|
+
|
3697
|
+
# Handle horizontal guides
|
3698
|
+
if hasattr(self.horizontal, "_callable") and self.horizontal._callable is not None:
|
3699
|
+
# If horizontal is dynamic (lambda), evaluate it
|
3700
|
+
page_guide.horizontal.from_content(self.horizontal._callable(page))
|
3701
|
+
else:
|
3702
|
+
# Copy static horizontal positions
|
3703
|
+
page_guide.horizontal.data = self.horizontal.data.copy()
|
3704
|
+
|
3705
|
+
# Extract table from this page
|
3706
|
+
table_result = page_guide.extract_table(
|
3707
|
+
method=method,
|
3708
|
+
table_settings=table_settings,
|
3709
|
+
use_ocr=use_ocr,
|
3710
|
+
ocr_config=ocr_config,
|
3711
|
+
text_options=text_options,
|
3712
|
+
cell_extraction_func=cell_extraction_func,
|
3713
|
+
show_progress=False, # Don't show nested progress
|
3714
|
+
content_filter=content_filter,
|
3715
|
+
apply_exclusions=apply_exclusions,
|
3716
|
+
)
|
3717
|
+
|
3718
|
+
# Convert to list of rows
|
3719
|
+
rows = list(table_result)
|
3720
|
+
|
3721
|
+
# Handle headers based on strategy
|
3722
|
+
if i == 0: # First page
|
3723
|
+
if header == "first" or header == "all":
|
3724
|
+
# Use first row as header
|
3725
|
+
if rows:
|
3726
|
+
header_row = rows[0]
|
3727
|
+
rows = rows[1:] # Remove header from data
|
3728
|
+
elif isinstance(header, list):
|
3729
|
+
# Custom headers provided
|
3730
|
+
header_row = header
|
3731
|
+
else: # Subsequent pages
|
3732
|
+
if header == "all" and skip_repeating_headers and rows:
|
3733
|
+
# Expect and remove header row
|
3734
|
+
if rows and header_row and rows[0] == header_row:
|
3735
|
+
rows = rows[1:]
|
3736
|
+
elif rows:
|
3737
|
+
# Still remove first row if it looks like a header
|
3738
|
+
rows = rows[1:]
|
3739
|
+
|
3740
|
+
# Add rows to combined result
|
3741
|
+
all_rows.extend(rows)
|
3742
|
+
|
3743
|
+
# Create final TableResult
|
3744
|
+
if isinstance(header, list):
|
3745
|
+
# Custom headers - prepend to data
|
3746
|
+
final_result = TableResult(all_rows)
|
3747
|
+
elif header_row is not None:
|
3748
|
+
# Prepend discovered header
|
3749
|
+
final_result = TableResult([header_row] + all_rows)
|
3750
|
+
else:
|
3751
|
+
# No headers
|
3752
|
+
final_result = TableResult(all_rows)
|
3753
|
+
|
3754
|
+
return final_result
|
3755
|
+
|
3580
3756
|
def _get_flow_orientation(self) -> Literal["vertical", "horizontal", "unknown"]:
|
3581
3757
|
"""Determines if a FlowRegion's constituent parts are arranged vertically or horizontally."""
|
3582
3758
|
if not self.is_flow_region or len(self.context.constituent_regions) < 2:
|
@@ -939,6 +939,11 @@ class ElementManager:
|
|
939
939
|
self.load_elements()
|
940
940
|
return self._elements.get("chars", [])
|
941
941
|
|
942
|
+
def invalidate_cache(self):
|
943
|
+
"""Invalidate the cached elements, forcing a reload on next access."""
|
944
|
+
self._elements = None
|
945
|
+
logger.debug(f"Page {self._page.number}: ElementManager cache invalidated")
|
946
|
+
|
942
947
|
@property
|
943
948
|
def words(self):
|
944
949
|
"""Get all word elements."""
|
natural_pdf/core/page.py
CHANGED
@@ -494,6 +494,9 @@ class Page(
|
|
494
494
|
exc_info=False,
|
495
495
|
)
|
496
496
|
raise
|
497
|
+
# Invalidate ElementManager cache since exclusions affect element filtering
|
498
|
+
if hasattr(self, "_element_mgr") and self._element_mgr:
|
499
|
+
self._element_mgr.invalidate_cache()
|
497
500
|
return self # Completed processing for selector input
|
498
501
|
|
499
502
|
# ElementCollection -----------------------------------------------
|
@@ -526,6 +529,9 @@ class Page(
|
|
526
529
|
exc_info=False,
|
527
530
|
)
|
528
531
|
raise
|
532
|
+
# Invalidate ElementManager cache since exclusions affect element filtering
|
533
|
+
if hasattr(self, "_element_mgr") and self._element_mgr:
|
534
|
+
self._element_mgr.invalidate_cache()
|
529
535
|
return self # Completed processing for ElementCollection input
|
530
536
|
|
531
537
|
# ------------------------------------------------------------------
|
@@ -618,6 +624,9 @@ class Page(
|
|
618
624
|
f"Page {self.index}: Failed to convert list item to Region: {e}"
|
619
625
|
)
|
620
626
|
continue
|
627
|
+
# Invalidate ElementManager cache since exclusions affect element filtering
|
628
|
+
if hasattr(self, "_element_mgr") and self._element_mgr:
|
629
|
+
self._element_mgr.invalidate_cache()
|
621
630
|
return self
|
622
631
|
else:
|
623
632
|
# Reject invalid types
|
@@ -629,6 +638,10 @@ class Page(
|
|
629
638
|
if exclusion_data:
|
630
639
|
self._exclusions.append(exclusion_data)
|
631
640
|
|
641
|
+
# Invalidate ElementManager cache since exclusions affect element filtering
|
642
|
+
if hasattr(self, "_element_mgr") and self._element_mgr:
|
643
|
+
self._element_mgr.invalidate_cache()
|
644
|
+
|
632
645
|
return self
|
633
646
|
|
634
647
|
def add_region(self, region: "Region", name: Optional[str] = None) -> "Page":
|
@@ -699,10 +712,26 @@ class Page(
|
|
699
712
|
"""
|
700
713
|
regions = []
|
701
714
|
|
715
|
+
# Combine page-specific exclusions with PDF-level exclusions
|
716
|
+
all_exclusions = list(self._exclusions) # Start with page-specific
|
717
|
+
|
718
|
+
# Add PDF-level exclusions if we have a parent PDF
|
719
|
+
if hasattr(self, "_parent") and self._parent and hasattr(self._parent, "_exclusions"):
|
720
|
+
for pdf_exclusion in self._parent._exclusions:
|
721
|
+
# Check if this exclusion is already in our list (avoid duplicates)
|
722
|
+
if pdf_exclusion not in all_exclusions:
|
723
|
+
# Ensure consistent format (PDF exclusions might be 2-tuples, need to be 3-tuples)
|
724
|
+
if len(pdf_exclusion) == 2:
|
725
|
+
# Convert to 3-tuple format with default method
|
726
|
+
pdf_exclusion = (pdf_exclusion[0], pdf_exclusion[1], "region")
|
727
|
+
all_exclusions.append(pdf_exclusion)
|
728
|
+
|
702
729
|
if debug:
|
703
|
-
print(
|
730
|
+
print(
|
731
|
+
f"\nPage {self.index}: Evaluating {len(all_exclusions)} exclusions ({len(self._exclusions)} page-specific, {len(all_exclusions) - len(self._exclusions)} from PDF)"
|
732
|
+
)
|
704
733
|
|
705
|
-
for i, exclusion_data in enumerate(
|
734
|
+
for i, exclusion_data in enumerate(all_exclusions):
|
706
735
|
# Handle both old format (2-tuple) and new format (3-tuple) for backward compatibility
|
707
736
|
if len(exclusion_data) == 2:
|
708
737
|
# Old format: (exclusion_item, label)
|
@@ -1598,7 +1627,14 @@ class Page(
|
|
1598
1627
|
return ""
|
1599
1628
|
|
1600
1629
|
# 2. Apply element-based exclusions if enabled
|
1601
|
-
|
1630
|
+
# Check both page-level and PDF-level exclusions
|
1631
|
+
has_exclusions = bool(self._exclusions) or (
|
1632
|
+
hasattr(self, "_parent")
|
1633
|
+
and self._parent
|
1634
|
+
and hasattr(self._parent, "_exclusions")
|
1635
|
+
and self._parent._exclusions
|
1636
|
+
)
|
1637
|
+
if use_exclusions and has_exclusions:
|
1602
1638
|
# Filter word elements through _filter_elements_by_exclusions
|
1603
1639
|
# This handles both element-based and region-based exclusions
|
1604
1640
|
word_elements = self._filter_elements_by_exclusions(
|
@@ -1612,7 +1648,7 @@ class Page(
|
|
1612
1648
|
# 3. Get region-based exclusions for spatial filtering
|
1613
1649
|
apply_exclusions_flag = kwargs.get("use_exclusions", use_exclusions)
|
1614
1650
|
exclusion_regions = []
|
1615
|
-
if apply_exclusions_flag and
|
1651
|
+
if apply_exclusions_flag and has_exclusions:
|
1616
1652
|
exclusion_regions = self._get_exclusion_regions(include_callable=True, debug=debug)
|
1617
1653
|
if debug:
|
1618
1654
|
logger.debug(
|
natural_pdf/core/pdf.py
CHANGED
@@ -173,11 +173,26 @@ class _LazyPageList(Sequence):
|
|
173
173
|
"""Create and cache a page at the given index within this list."""
|
174
174
|
cached = self._cache[index]
|
175
175
|
if cached is None:
|
176
|
+
# Get the actual page index in the full PDF
|
177
|
+
actual_page_index = self._indices[index]
|
178
|
+
|
179
|
+
# First check if this page is already cached in the parent PDF's main page list
|
180
|
+
if (
|
181
|
+
hasattr(self._parent_pdf, "_pages")
|
182
|
+
and hasattr(self._parent_pdf._pages, "_cache")
|
183
|
+
and actual_page_index < len(self._parent_pdf._pages._cache)
|
184
|
+
and self._parent_pdf._pages._cache[actual_page_index] is not None
|
185
|
+
):
|
186
|
+
# Reuse the already-cached page from the parent PDF
|
187
|
+
# This ensures we get any exclusions that were already applied
|
188
|
+
cached = self._parent_pdf._pages._cache[actual_page_index]
|
189
|
+
self._cache[index] = cached
|
190
|
+
return cached
|
191
|
+
|
176
192
|
# Import here to avoid circular import problems
|
177
193
|
from natural_pdf.core.page import Page
|
178
194
|
|
179
|
-
#
|
180
|
-
actual_page_index = self._indices[index]
|
195
|
+
# Create new page
|
181
196
|
plumber_page = self._plumber_pdf.pages[actual_page_index]
|
182
197
|
cached = Page(
|
183
198
|
plumber_page,
|
@@ -196,6 +211,30 @@ class _LazyPageList(Sequence):
|
|
196
211
|
except Exception as e:
|
197
212
|
logger.warning(f"Failed to apply exclusion to page {cached.number}: {e}")
|
198
213
|
|
214
|
+
# Check if the parent PDF already has a cached page with page-specific exclusions
|
215
|
+
if hasattr(self._parent_pdf, "_pages") and hasattr(self._parent_pdf._pages, "_cache"):
|
216
|
+
parent_cache = self._parent_pdf._pages._cache
|
217
|
+
if (
|
218
|
+
actual_page_index < len(parent_cache)
|
219
|
+
and parent_cache[actual_page_index] is not None
|
220
|
+
):
|
221
|
+
existing_page = parent_cache[actual_page_index]
|
222
|
+
# Copy over any page-specific exclusions from the existing page
|
223
|
+
# Only copy non-callable exclusions (regions/elements) to avoid duplicating PDF-level exclusions
|
224
|
+
if hasattr(existing_page, "_exclusions") and existing_page._exclusions:
|
225
|
+
for exclusion_data in existing_page._exclusions:
|
226
|
+
exclusion_item = exclusion_data[0]
|
227
|
+
# Skip callable exclusions as they're PDF-level and already applied above
|
228
|
+
if not callable(exclusion_item):
|
229
|
+
try:
|
230
|
+
cached.add_exclusion(
|
231
|
+
*exclusion_data[:2]
|
232
|
+
) # exclusion_item and label
|
233
|
+
except Exception as e:
|
234
|
+
logger.warning(
|
235
|
+
f"Failed to copy page-specific exclusion to page {cached.number}: {e}"
|
236
|
+
)
|
237
|
+
|
199
238
|
# Apply any stored regions to the newly created page
|
200
239
|
if hasattr(self._parent_pdf, "_regions"):
|
201
240
|
for region_data in self._parent_pdf._regions:
|
natural_pdf/elements/region.py
CHANGED
@@ -1236,6 +1236,7 @@ class Region(
|
|
1236
1236
|
content_filter: Optional[
|
1237
1237
|
Union[str, Callable[[str], bool], List[str]]
|
1238
1238
|
] = None, # NEW: Content filtering
|
1239
|
+
apply_exclusions: bool = True, # Whether to apply exclusion regions during extraction
|
1239
1240
|
) -> TableResult: # Return type allows Optional[str] for cells
|
1240
1241
|
"""
|
1241
1242
|
Extract a table from this region.
|
@@ -1260,6 +1261,8 @@ class Region(
|
|
1260
1261
|
- A callable that takes text and returns True to KEEP the character
|
1261
1262
|
- A list of regex patterns (characters matching ANY pattern are EXCLUDED)
|
1262
1263
|
Works with all extraction methods by filtering cell content.
|
1264
|
+
apply_exclusions: Whether to apply exclusion regions during text extraction (default: True).
|
1265
|
+
When True, text within excluded regions (e.g., headers/footers) will not be extracted.
|
1263
1266
|
|
1264
1267
|
Returns:
|
1265
1268
|
Table data as a list of rows, where each row is a list of cell values (str or None).
|
@@ -1297,7 +1300,9 @@ class Region(
|
|
1297
1300
|
)
|
1298
1301
|
return TableResult(
|
1299
1302
|
self._extract_table_from_cells(
|
1300
|
-
cell_regions_in_table,
|
1303
|
+
cell_regions_in_table,
|
1304
|
+
content_filter=content_filter,
|
1305
|
+
apply_exclusions=apply_exclusions,
|
1301
1306
|
)
|
1302
1307
|
)
|
1303
1308
|
|
@@ -1381,16 +1386,22 @@ class Region(
|
|
1381
1386
|
# Use the selected method
|
1382
1387
|
if effective_method == "tatr":
|
1383
1388
|
table_rows = self._extract_table_tatr(
|
1384
|
-
use_ocr=use_ocr,
|
1389
|
+
use_ocr=use_ocr,
|
1390
|
+
ocr_config=ocr_config,
|
1391
|
+
content_filter=content_filter,
|
1392
|
+
apply_exclusions=apply_exclusions,
|
1385
1393
|
)
|
1386
1394
|
elif effective_method == "text":
|
1387
1395
|
current_text_options = text_options.copy()
|
1388
1396
|
current_text_options["cell_extraction_func"] = cell_extraction_func
|
1389
1397
|
current_text_options["show_progress"] = show_progress
|
1390
1398
|
current_text_options["content_filter"] = content_filter
|
1399
|
+
current_text_options["apply_exclusions"] = apply_exclusions
|
1391
1400
|
table_rows = self._extract_table_text(**current_text_options)
|
1392
1401
|
elif effective_method == "pdfplumber":
|
1393
|
-
table_rows = self._extract_table_plumber(
|
1402
|
+
table_rows = self._extract_table_plumber(
|
1403
|
+
table_settings, content_filter=content_filter, apply_exclusions=apply_exclusions
|
1404
|
+
)
|
1394
1405
|
else:
|
1395
1406
|
raise ValueError(
|
1396
1407
|
f"Unknown table extraction method: '{method}'. Choose from 'tatr', 'pdfplumber', 'text', 'stream', 'lattice'."
|
@@ -1604,7 +1615,9 @@ class Region(
|
|
1604
1615
|
# Return empty list if no tables found
|
1605
1616
|
return []
|
1606
1617
|
|
1607
|
-
def _extract_table_plumber(
|
1618
|
+
def _extract_table_plumber(
|
1619
|
+
self, table_settings: dict, content_filter=None, apply_exclusions=True
|
1620
|
+
) -> List[List[str]]:
|
1608
1621
|
"""
|
1609
1622
|
Extract table using pdfplumber's table extraction.
|
1610
1623
|
This method extracts the largest table within the region.
|
@@ -1646,7 +1659,7 @@ class Region(
|
|
1646
1659
|
# -------------------------------------------------------------
|
1647
1660
|
base_plumber_page = self.page._page
|
1648
1661
|
|
1649
|
-
if getattr(self.page, "_exclusions", None):
|
1662
|
+
if apply_exclusions and getattr(self.page, "_exclusions", None):
|
1650
1663
|
exclusion_regions = self.page._get_exclusion_regions(include_callable=True)
|
1651
1664
|
|
1652
1665
|
def _keep_char(obj):
|
@@ -1701,7 +1714,7 @@ class Region(
|
|
1701
1714
|
return []
|
1702
1715
|
|
1703
1716
|
def _extract_table_tatr(
|
1704
|
-
self, use_ocr=False, ocr_config=None, content_filter=None
|
1717
|
+
self, use_ocr=False, ocr_config=None, content_filter=None, apply_exclusions=True
|
1705
1718
|
) -> List[List[str]]:
|
1706
1719
|
"""
|
1707
1720
|
Extract table using TATR structure detection.
|
@@ -1789,7 +1802,7 @@ class Region(
|
|
1789
1802
|
continue
|
1790
1803
|
|
1791
1804
|
# Fallback to normal extraction
|
1792
|
-
header_text = header.extract_text().strip()
|
1805
|
+
header_text = header.extract_text(apply_exclusions=apply_exclusions).strip()
|
1793
1806
|
if content_filter is not None:
|
1794
1807
|
header_text = self._apply_content_filter_to_text(header_text, content_filter)
|
1795
1808
|
header_texts.append(header_text)
|
@@ -1824,7 +1837,7 @@ class Region(
|
|
1824
1837
|
continue
|
1825
1838
|
|
1826
1839
|
# Fallback to normal extraction
|
1827
|
-
cell_text = cell_region.extract_text().strip()
|
1840
|
+
cell_text = cell_region.extract_text(apply_exclusions=apply_exclusions).strip()
|
1828
1841
|
if content_filter is not None:
|
1829
1842
|
cell_text = self._apply_content_filter_to_text(cell_text, content_filter)
|
1830
1843
|
row_cells.append(cell_text)
|
@@ -1840,7 +1853,7 @@ class Region(
|
|
1840
1853
|
continue
|
1841
1854
|
|
1842
1855
|
# Fallback to normal extraction
|
1843
|
-
row_text = row.extract_text().strip()
|
1856
|
+
row_text = row.extract_text(apply_exclusions=apply_exclusions).strip()
|
1844
1857
|
if content_filter is not None:
|
1845
1858
|
row_text = self._apply_content_filter_to_text(row_text, content_filter)
|
1846
1859
|
row_cells.append(row_text)
|
@@ -1866,6 +1879,8 @@ class Region(
|
|
1866
1879
|
show_progress = text_options.pop("show_progress", False)
|
1867
1880
|
# --- Get content_filter option --- #
|
1868
1881
|
content_filter = text_options.pop("content_filter", None)
|
1882
|
+
# --- Get apply_exclusions option --- #
|
1883
|
+
apply_exclusions = text_options.pop("apply_exclusions", True)
|
1869
1884
|
|
1870
1885
|
# Analyze structure first (or use cached results)
|
1871
1886
|
if "text_table_structure" in self.analyses:
|
@@ -1946,7 +1961,9 @@ class Region(
|
|
1946
1961
|
cell_value = None
|
1947
1962
|
else:
|
1948
1963
|
cell_value = cell_region.extract_text(
|
1949
|
-
layout=False,
|
1964
|
+
layout=False,
|
1965
|
+
apply_exclusions=apply_exclusions,
|
1966
|
+
content_filter=content_filter,
|
1950
1967
|
).strip()
|
1951
1968
|
|
1952
1969
|
rounded_top = round(cell_data["top"] / coord_tolerance) * coord_tolerance
|
@@ -3397,7 +3414,7 @@ class Region(
|
|
3397
3414
|
# ------------------------------------------------------------------
|
3398
3415
|
|
3399
3416
|
def _extract_table_from_cells(
|
3400
|
-
self, cell_regions: List["Region"], content_filter=None
|
3417
|
+
self, cell_regions: List["Region"], content_filter=None, apply_exclusions=True
|
3401
3418
|
) -> List[List[Optional[str]]]:
|
3402
3419
|
"""Construct a table (list-of-lists) from table_cell regions.
|
3403
3420
|
|
@@ -3439,7 +3456,9 @@ class Region(
|
|
3439
3456
|
r_idx = int(cell.metadata.get("row_index"))
|
3440
3457
|
c_idx = int(cell.metadata.get("col_index"))
|
3441
3458
|
text_val = cell.extract_text(
|
3442
|
-
layout=False,
|
3459
|
+
layout=False,
|
3460
|
+
apply_exclusions=apply_exclusions,
|
3461
|
+
content_filter=content_filter,
|
3443
3462
|
).strip()
|
3444
3463
|
table_grid[r_idx][c_idx] = text_val if text_val else None
|
3445
3464
|
except Exception as _err:
|
@@ -3488,7 +3507,7 @@ class Region(
|
|
3488
3507
|
col_idx = int(np.argmin([abs(cx - cc) for cc in col_centers]))
|
3489
3508
|
|
3490
3509
|
text_val = cell.extract_text(
|
3491
|
-
layout=False, apply_exclusions=
|
3510
|
+
layout=False, apply_exclusions=apply_exclusions, content_filter=content_filter
|
3492
3511
|
).strip()
|
3493
3512
|
table_grid[row_idx][col_idx] = text_val if text_val else None
|
3494
3513
|
|
natural_pdf/tables/result.py
CHANGED
@@ -41,7 +41,7 @@ class TableResult(Sequence):
|
|
41
41
|
|
42
42
|
def to_df(
|
43
43
|
self,
|
44
|
-
header: Union[str, int, List[int], None] = "first",
|
44
|
+
header: Union[str, int, List[int], List[str], None] = "first",
|
45
45
|
index_col=None,
|
46
46
|
skip_repeating_headers=None,
|
47
47
|
keep_blank: bool = False,
|
@@ -51,8 +51,8 @@ class TableResult(Sequence):
|
|
51
51
|
|
52
52
|
Parameters
|
53
53
|
----------
|
54
|
-
header : "first" | int | list[int] | None, default "first"
|
55
|
-
• "first" – use row 0 as column names.\n • int – use that row index.\n • list[int] – multi-row header.\n • None/False– no header.
|
54
|
+
header : "first" | int | list[int] | list[str] | None, default "first"
|
55
|
+
• "first" – use row 0 as column names.\n • int – use that row index.\n • list[int] – multi-row header.\n • list[str] – custom column names.\n • None/False– no header.
|
56
56
|
|
57
57
|
Note: If the header row has a different number of columns than the
|
58
58
|
body rows, the method will automatically fall back to header=None
|
@@ -84,7 +84,11 @@ class TableResult(Sequence):
|
|
84
84
|
|
85
85
|
# Determine default for skip_repeating_headers based on header parameter
|
86
86
|
if skip_repeating_headers is None:
|
87
|
-
skip_repeating_headers =
|
87
|
+
skip_repeating_headers = (
|
88
|
+
header is not None
|
89
|
+
and header is not False
|
90
|
+
and not (isinstance(header, (list, tuple)) and len(header) == 0)
|
91
|
+
)
|
88
92
|
|
89
93
|
# Determine header rows and body rows
|
90
94
|
body = rows
|
@@ -97,10 +101,31 @@ class TableResult(Sequence):
|
|
97
101
|
elif isinstance(header, int):
|
98
102
|
hdr = rows[header]
|
99
103
|
body = rows[:header] + rows[header + 1 :]
|
100
|
-
elif isinstance(header, (list, tuple)):
|
104
|
+
elif isinstance(header, (list, tuple)) and all(isinstance(i, int) for i in header):
|
105
|
+
# List of integers - multi-row header
|
101
106
|
hdr_rows = [rows[i] for i in header]
|
102
107
|
body = [r for idx, r in enumerate(rows) if idx not in header]
|
103
108
|
hdr = hdr_rows
|
109
|
+
elif (
|
110
|
+
isinstance(header, (list, tuple))
|
111
|
+
and len(header) > 0
|
112
|
+
and all(isinstance(i, str) for i in header)
|
113
|
+
):
|
114
|
+
# List of strings - custom column names
|
115
|
+
hdr = list(header)
|
116
|
+
body = rows
|
117
|
+
# Validate column count matches
|
118
|
+
if body:
|
119
|
+
max_cols = max(len(row) for row in body)
|
120
|
+
if len(hdr) != max_cols:
|
121
|
+
raise ValueError(
|
122
|
+
f"Number of column names ({len(hdr)}) must match "
|
123
|
+
f"number of columns in data ({max_cols})"
|
124
|
+
)
|
125
|
+
elif isinstance(header, (list, tuple)) and len(header) == 0:
|
126
|
+
# Empty list behaves like None
|
127
|
+
hdr = None
|
128
|
+
body = rows
|
104
129
|
else:
|
105
130
|
raise ValueError("Invalid value for header parameter")
|
106
131
|
|
@@ -125,7 +150,12 @@ class TableResult(Sequence):
|
|
125
150
|
pass
|
126
151
|
|
127
152
|
# Check for header/body column count mismatch and fallback to no header
|
128
|
-
if
|
153
|
+
if (
|
154
|
+
hdr is not None
|
155
|
+
and body
|
156
|
+
and not (isinstance(header, (list, tuple)) and all(isinstance(i, str) for i in header))
|
157
|
+
):
|
158
|
+
# Skip this check for custom string headers
|
129
159
|
# Get the maximum number of columns from all body rows
|
130
160
|
# This handles cases where some rows have different column counts
|
131
161
|
max_cols = max(len(row) for row in body) if body else 0
|
@@ -144,6 +174,9 @@ class TableResult(Sequence):
|
|
144
174
|
hdr = None
|
145
175
|
body = self._rows # Use all rows as body
|
146
176
|
|
177
|
+
# Handle empty list case - pandas needs None not empty list
|
178
|
+
if isinstance(hdr, list) and len(hdr) == 0:
|
179
|
+
hdr = None
|
147
180
|
df = pd.DataFrame(body, columns=hdr)
|
148
181
|
|
149
182
|
# Convert empty strings to NaN by default
|
@@ -2,7 +2,7 @@ natural_pdf/__init__.py,sha256=N4pR0LbuPEnUYFZqbdVqc_FGKldgwPQc1wjJhYKTBBM,3417
|
|
2
2
|
natural_pdf/cli.py,sha256=SkPwhhMM-GhLsj3O1n1Agxz4KOxcZ08sj8hVQSFJB5c,4064
|
3
3
|
natural_pdf/text_mixin.py,sha256=eFCiHj6Okcw3aum4955BepcI2NPRalkf9UFFVTc_H30,4012
|
4
4
|
natural_pdf/analyzers/__init__.py,sha256=3XGoNq3OgiVkZP7tOdeP5XVUl7fDgyztdA8DlOcMLXg,1138
|
5
|
-
natural_pdf/analyzers/guides.py,sha256=
|
5
|
+
natural_pdf/analyzers/guides.py,sha256=mLWPPEwywo_FbU3gSoegiRlzxYmkHEo2c4DLX9krH9k,157691
|
6
6
|
natural_pdf/analyzers/shape_detection_mixin.py,sha256=mgpyJ4jIulz9l9HCqThabJIsLSrXh9BB2AmLxUoHmw0,62584
|
7
7
|
natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
|
8
8
|
natural_pdf/analyzers/text_structure.py,sha256=3WWusi-BI0krUnJxB05DD6XmKj5qRNvQBqH7zOQGm1M,28451
|
@@ -25,12 +25,12 @@ natural_pdf/classification/mixin.py,sha256=CXygXXhe_qx1563SmIjiu4uSnZkxCkuRR4fGv
|
|
25
25
|
natural_pdf/classification/results.py,sha256=5ha77CxK0GYwkBMJbvUBZkBjsL5GpOveIZDK9nO4j8I,3239
|
26
26
|
natural_pdf/collections/mixins.py,sha256=Se2C5AcpP9B5E0d0pIrey6-f_P32tAXTK4M7666MNj0,5688
|
27
27
|
natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
|
28
|
-
natural_pdf/core/element_manager.py,sha256=
|
28
|
+
natural_pdf/core/element_manager.py,sha256=KPuKM7SstfErTkRnGq4vrgE0Tv8iazN13Jp7yAXGKso,55575
|
29
29
|
natural_pdf/core/highlighting_service.py,sha256=7on8nErhi50CEH2L4XzGIZ6tIqZtMzmmFlp-2lmwnYE,68856
|
30
|
-
natural_pdf/core/page.py,sha256=
|
30
|
+
natural_pdf/core/page.py,sha256=Q3hBvB9KFB8doeXY7YVQt3G1ULdBDfA-0BQD6YPN4oo,144640
|
31
31
|
natural_pdf/core/page_collection.py,sha256=hEeXs_fzB73XZ8ZkHz2kIuSgBYcVYydvGMMdGuB1rvw,52486
|
32
32
|
natural_pdf/core/page_groupby.py,sha256=550ME6kd-h-2u75oUIIIqTYsmh8VvdQO1nXXioL8J6A,7378
|
33
|
-
natural_pdf/core/pdf.py,sha256=
|
33
|
+
natural_pdf/core/pdf.py,sha256=VslSn00So6157XfiYbrB9URpx5VlWyshQOt7upi9us4,104248
|
34
34
|
natural_pdf/core/pdf_collection.py,sha256=s3ogu4CEHrHMTRqQMJUKJZ-9Ii8b_B9dWbVLTFj0s7g,34992
|
35
35
|
natural_pdf/core/render_spec.py,sha256=rLicaS9EPyojpJcjy2Lzn5DLWQwjrFyDJyRo7jbjdGU,14505
|
36
36
|
natural_pdf/describe/__init__.py,sha256=kIV7ORmWWB1SAur7nK2aAwR-wHqSedhKfUsaUl4hG0A,586
|
@@ -44,7 +44,7 @@ natural_pdf/elements/element_collection.py,sha256=slCUnOT04sNOTjSGgmhjcCKKPVPtdD
|
|
44
44
|
natural_pdf/elements/image.py,sha256=zu-P2Y8fRoEXf6IeZU0EYRWsgZ6I_a5vy1FA3VXTGkQ,1424
|
45
45
|
natural_pdf/elements/line.py,sha256=TFn7KXjPT_jUQyQyabU0F7XYU4dC-qadwodJMZF4DCU,3844
|
46
46
|
natural_pdf/elements/rect.py,sha256=0lNkVkPkvbRbrFED856RXoUcTcDkeeOIs5xldKGAQT8,3324
|
47
|
-
natural_pdf/elements/region.py,sha256=
|
47
|
+
natural_pdf/elements/region.py,sha256=_NNBewHlyUHvA4g9kApilP6it0cn2IRlcGG4r993oUI,156660
|
48
48
|
natural_pdf/elements/text.py,sha256=829uSJv9E-8cC6T6iR_Va7Xtv54pJoyRN78fq4NN1d4,20687
|
49
49
|
natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
|
50
50
|
natural_pdf/exporters/__init__.py,sha256=QffoARekR6WzXEd05oxOytly4qPdBizuIF-SUkeFpig,643
|
@@ -87,7 +87,7 @@ natural_pdf/search/searchable_mixin.py,sha256=hqQ_AuID5eTGRCtKYdFLZ1zF35y73uk3x1
|
|
87
87
|
natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
|
88
88
|
natural_pdf/selectors/parser.py,sha256=pw0M8ICKPMOzZPzWpLsQMG_lnl8PewGIdIG3ciukabk,38877
|
89
89
|
natural_pdf/tables/__init__.py,sha256=sCvCGbGsL6BiqlNxAYfVv003bIDLI11FmjHhaWfcU6w,104
|
90
|
-
natural_pdf/tables/result.py,sha256
|
90
|
+
natural_pdf/tables/result.py,sha256=-8ctA-jCJYSHtlfAoqTvhUwO5zSP2BQxxetAjqEsNyg,8665
|
91
91
|
natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
|
92
92
|
natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
|
93
93
|
natural_pdf/utils/bidi_mirror.py,sha256=jJEES0xDrMfo5Me8kHMxHv4COS51PitnYi2EvKv3HCE,1151
|
@@ -106,7 +106,7 @@ natural_pdf/vision/results.py,sha256=F2zXG3MVZIpOUvPkJHotOq6-9rFz68BaO_8pnSndlOs
|
|
106
106
|
natural_pdf/vision/similarity.py,sha256=YH8legN-t9uf1b_XULi4JLNDaRfPNKQwU1FZ4Qu08jY,11740
|
107
107
|
natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
|
108
108
|
natural_pdf/widgets/viewer.py,sha256=KW3JogdR2TMg2ECUMYp8hwd060hfg8EsYBWxb5IEzBY,24942
|
109
|
-
natural_pdf-0.2.
|
109
|
+
natural_pdf-0.2.5.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
|
110
110
|
optimization/memory_comparison.py,sha256=0i_foFSRmppj-fY069qjwH36s_zkx-1L2ASAAlepWzA,6541
|
111
111
|
optimization/pdf_analyzer.py,sha256=HjrmTgu2qchxPeDckc5kjgxppGwd40UESrYS9Myj7pY,19352
|
112
112
|
optimization/performance_analysis.py,sha256=JBXnR9hc7Ix7YCnt3EJPSpsyqIUgKsc7GEffQ_TDCBk,13033
|
@@ -123,8 +123,8 @@ tools/bad_pdf_eval/llm_enrich.py,sha256=mCh4KGi1HmIkzGjj5rrHz1Osd7sEX1IZ_FW08H1t
|
|
123
123
|
tools/bad_pdf_eval/llm_enrich_with_retry.py,sha256=XUtPF1hUvqd3frDXT0wDTXoonuAivhjM5vgFdZ-tm0A,9373
|
124
124
|
tools/bad_pdf_eval/reporter.py,sha256=e1g__mkSB4q02p3mGWOwMhvFs7F2HJosNBxup0-LkyU,400
|
125
125
|
tools/bad_pdf_eval/utils.py,sha256=hR95XQ7qf7Cu6BdyX0L7ggGVx-ah5sK0jHWblTJUUic,4896
|
126
|
-
natural_pdf-0.2.
|
127
|
-
natural_pdf-0.2.
|
128
|
-
natural_pdf-0.2.
|
129
|
-
natural_pdf-0.2.
|
130
|
-
natural_pdf-0.2.
|
126
|
+
natural_pdf-0.2.5.dist-info/METADATA,sha256=H9nhjh1zRBmz2vUTe_j6FT-Zvn1sgoWT0nyoZG5GTYg,6959
|
127
|
+
natural_pdf-0.2.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
128
|
+
natural_pdf-0.2.5.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
|
129
|
+
natural_pdf-0.2.5.dist-info/top_level.txt,sha256=80t0F2ZeX4vN4Ke5iTflcOk_PN_0USn33ha3X6X86Ik,36
|
130
|
+
natural_pdf-0.2.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|