natural-pdf 0.1.24__py3-none-any.whl → 0.1.26.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
natural_pdf/core/page.py CHANGED
@@ -1576,8 +1576,12 @@ class Page(ClassificationMixin, ExtractionMixin, ShapeDetectionMixin, DescribeMi
1576
1576
  render_ocr: Whether to render OCR text on highlights.
1577
1577
  resolution: Resolution in DPI for base page image (default: scale * 72).
1578
1578
  include_highlights: Whether to render highlights.
1579
- exclusions: If 'mask', excluded regions will be whited out on the image.
1580
- (default: None).
1579
+ exclusions: Accepts one of the following:
1580
+ • None – no masking (default)
1581
+ • "mask" – mask using solid white (back-compat)
1582
+ • CSS/HTML colour string (e.g. "red", "#ff0000", "#ff000080")
1583
+ • Tuple of RGB or RGBA values (ints 0-255 or floats 0-1)
1584
+ All excluded regions are filled with this colour.
1581
1585
  **kwargs: Additional parameters for pdfplumber.to_image.
1582
1586
 
1583
1587
  Returns:
@@ -1690,7 +1694,52 @@ class Page(ClassificationMixin, ExtractionMixin, ShapeDetectionMixin, DescribeMi
1690
1694
  # --- Apply exclusion masking if requested ---
1691
1695
  # This modifies 'rendered_image_component'
1692
1696
  image_after_masking = rendered_image_component # Start with the rendered image
1693
- if exclusions == "mask" and self._exclusions:
1697
+
1698
+ # Determine if masking is requested and establish the fill colour
1699
+ mask_requested = exclusions is not None and self._exclusions
1700
+ mask_color: Union[str, Tuple[int, int, int, int]] = "white" # default
1701
+
1702
+ if mask_requested:
1703
+ if exclusions != "mask":
1704
+ # Attempt to parse custom colour input
1705
+ try:
1706
+ if isinstance(exclusions, tuple):
1707
+ # Handle RGB/RGBA tuples with ints 0-255 or floats 0-1
1708
+ processed = []
1709
+ all_float = all(isinstance(c, float) for c in exclusions)
1710
+ for i, c in enumerate(exclusions):
1711
+ if isinstance(c, float):
1712
+ val = int(c * 255) if all_float or i == 3 else int(c)
1713
+ else:
1714
+ val = int(c)
1715
+ processed.append(max(0, min(255, val)))
1716
+ if len(processed) == 3:
1717
+ processed.append(255) # add full alpha
1718
+ mask_color = tuple(processed) # type: ignore[assignment]
1719
+ elif isinstance(exclusions, str):
1720
+ # Try using the optional 'colour' library for rich parsing
1721
+ try:
1722
+ from colour import Color # type: ignore
1723
+
1724
+ color_obj = Color(exclusions)
1725
+ mask_color = (
1726
+ int(color_obj.red * 255),
1727
+ int(color_obj.green * 255),
1728
+ int(color_obj.blue * 255),
1729
+ 255,
1730
+ )
1731
+ except Exception:
1732
+ # Fallback: if parsing fails, treat as plain string accepted by PIL
1733
+ mask_color = exclusions # e.g. "red"
1734
+ else:
1735
+ logger.warning(
1736
+ f"Unsupported exclusions colour spec: {exclusions!r}. Using white."
1737
+ )
1738
+ except Exception as colour_parse_err: # pragma: no cover
1739
+ logger.warning(
1740
+ f"Failed to parse exclusions colour {exclusions!r}: {colour_parse_err}. Using white."
1741
+ )
1742
+
1694
1743
  try:
1695
1744
  # Ensure image is mutable (RGB or RGBA)
1696
1745
  if image_after_masking.mode not in ("RGB", "RGBA"):
@@ -1701,17 +1750,23 @@ class Page(ClassificationMixin, ExtractionMixin, ShapeDetectionMixin, DescribeMi
1701
1750
  )
1702
1751
  if exclusion_regions:
1703
1752
  draw = ImageDraw.Draw(image_after_masking)
1704
- # Calculate the scaling factor used for the image
1753
+ # Scaling factor for converting PDF pts image px
1705
1754
  img_scale = render_resolution / 72.0
1706
1755
 
1756
+ # Determine fill colour compatible with current mode
1757
+ def _mode_compatible(colour):
1758
+ if isinstance(colour, tuple) and image_after_masking.mode != "RGBA":
1759
+ return colour[:3] # drop alpha for RGB images
1760
+ return colour
1761
+
1762
+ fill_colour = _mode_compatible(mask_color)
1763
+
1707
1764
  for region in exclusion_regions:
1708
- # Convert PDF points (x0, top, x1, bottom) to image pixels
1709
1765
  img_x0 = region.x0 * img_scale
1710
1766
  img_top = region.top * img_scale
1711
1767
  img_x1 = region.x1 * img_scale
1712
1768
  img_bottom = region.bottom * img_scale
1713
1769
 
1714
- # Draw a white rectangle over the excluded area
1715
1770
  img_coords = (
1716
1771
  max(0, img_x0),
1717
1772
  max(0, img_top),
@@ -1719,7 +1774,7 @@ class Page(ClassificationMixin, ExtractionMixin, ShapeDetectionMixin, DescribeMi
1719
1774
  min(image_after_masking.height, img_bottom),
1720
1775
  )
1721
1776
  if img_coords[0] < img_coords[2] and img_coords[1] < img_coords[3]:
1722
- draw.rectangle(img_coords, fill="white")
1777
+ draw.rectangle(img_coords, fill=fill_colour)
1723
1778
  else: # pragma: no cover
1724
1779
  logger.warning(
1725
1780
  f"Skipping invalid exclusion rect for masking: {img_coords}"
@@ -1994,6 +2049,10 @@ class Page(ClassificationMixin, ExtractionMixin, ShapeDetectionMixin, DescribeMi
1994
2049
  )
1995
2050
  return ElementCollection([]) # Return empty collection
1996
2051
 
2052
+ # Clear existing detected regions if 'replace' is specified
2053
+ if existing == "replace":
2054
+ self.clear_detected_layout_regions()
2055
+
1997
2056
  # The analyzer's analyze_layout method already adds regions to the page
1998
2057
  # and its element manager. We just need to retrieve them.
1999
2058
  analyzer.analyze_layout(
@@ -56,8 +56,8 @@ class ElementSummary:
56
56
  section_title = name.replace('_', ' ').title()
57
57
 
58
58
  if isinstance(data, dict):
59
- lines = [f"**{section_title}**:"]
60
- lines.extend(self._format_dict(data, indent=" "))
59
+ lines = [f"**{section_title}**:", ""]
60
+ lines.extend(self._format_dict(data, indent=""))
61
61
  elif isinstance(data, list):
62
62
  lines = [f"**{section_title}**: {', '.join(str(item) for item in data)}"]
63
63
  else:
@@ -94,6 +94,14 @@ class LineElement(Element):
94
94
  # Vertical if x-change is within tolerance and y-change is significant
95
95
  return dx <= tolerance and dy > tolerance
96
96
 
97
+ @property
98
+ def orientation(self) -> str:
99
+ """Get the orientation of the line ('horizontal', 'vertical', or 'diagonal')."""
100
+ if self.is_horizontal:
101
+ return "horizontal"
102
+ elif self.is_vertical:
103
+ return "vertical"
104
+
97
105
  def text_above(self, distance: float = 5, **kwargs) -> Any:
98
106
  """
99
107
  Get text elements above this line.
@@ -142,7 +150,4 @@ class LineElement(Element):
142
150
 
143
151
  def __repr__(self) -> str:
144
152
  """String representation of the line element."""
145
- line_type = (
146
- "horizontal" if self.is_horizontal else "vertical" if self.is_vertical else "diagonal"
147
- )
148
- return f"<LineElement type={line_type} width={self.width:.1f} bbox={self.bbox}>"
153
+ return f"<LineElement type={self.orientation} width={self.width:.1f} bbox={self.bbox}>"
@@ -1924,18 +1924,54 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
1924
1924
  f"Region {self.bbox}: Removing existing OCR elements before applying new OCR."
1925
1925
  )
1926
1926
 
1927
- # Remove existing OCR word elements strictly inside this region
1928
- ocr_selector = "text[source=ocr]"
1929
- ocr_elements = self.find_all(ocr_selector, apply_exclusions=False)
1930
- if ocr_elements:
1931
- removed_count = ocr_elements.remove()
1932
- logger.info(
1933
- f"Region {self.bbox}: Removed {removed_count} existing OCR word elements in region before re-applying OCR."
1934
- )
1935
- else:
1936
- logger.info(
1937
- f"Region {self.bbox}: No existing OCR word elements found within region to remove."
1938
- )
1927
+ # --- Robust removal: iterate through all OCR elements on the page and
1928
+ # remove those that overlap this region. This avoids reliance on
1929
+ # identity‐based look-ups that can break if the ElementManager
1930
+ # rebuilt its internal lists.
1931
+
1932
+ removed_count = 0
1933
+
1934
+ # Helper to remove a single element safely
1935
+ def _safe_remove(elem):
1936
+ nonlocal removed_count
1937
+ success = False
1938
+ if hasattr(elem, "page") and hasattr(elem.page, "_element_mgr"):
1939
+ etype = getattr(elem, "object_type", "word")
1940
+ if etype == "word":
1941
+ etype_key = "words"
1942
+ elif etype == "char":
1943
+ etype_key = "chars"
1944
+ else:
1945
+ etype_key = etype + "s" if not etype.endswith("s") else etype
1946
+ try:
1947
+ success = elem.page._element_mgr.remove_element(elem, etype_key)
1948
+ except Exception:
1949
+ success = False
1950
+ if success:
1951
+ removed_count += 1
1952
+
1953
+ # Remove OCR WORD elements overlapping region
1954
+ for word in list(self.page._element_mgr.words):
1955
+ if getattr(word, "source", None) == "ocr" and self.intersects(word):
1956
+ _safe_remove(word)
1957
+
1958
+ # Remove OCR CHAR dicts overlapping region
1959
+ for char in list(self.page._element_mgr.chars):
1960
+ # char can be dict or TextElement; normalise
1961
+ char_src = char.get("source") if isinstance(char, dict) else getattr(char, "source", None)
1962
+ if char_src == "ocr":
1963
+ # Rough bbox for dicts
1964
+ if isinstance(char, dict):
1965
+ cx0, ctop, cx1, cbottom = char.get("x0", 0), char.get("top", 0), char.get("x1", 0), char.get("bottom", 0)
1966
+ else:
1967
+ cx0, ctop, cx1, cbottom = char.x0, char.top, char.x1, char.bottom
1968
+ # Quick overlap check
1969
+ if not (cx1 < self.x0 or cx0 > self.x1 or cbottom < self.top or ctop > self.bottom):
1970
+ _safe_remove(char)
1971
+
1972
+ logger.info(
1973
+ f"Region {self.bbox}: Removed {removed_count} existing OCR elements (words & chars) before re-applying OCR."
1974
+ )
1939
1975
 
1940
1976
  ocr_mgr = self.page._parent._ocr_manager
1941
1977
 
@@ -43,9 +43,58 @@ class TextElement(Element):
43
43
 
44
44
  @text.setter
45
45
  def text(self, value: str):
46
- """Set the text content."""
46
+ """Set the text content and synchronise underlying char dictionaries (if any)."""
47
+ # Update the primary text value stored on the object itself
47
48
  self._obj["text"] = value
48
49
 
50
+ # --- Keep _char_dicts in sync so downstream utilities (e.g. extract_text)
51
+ # that rely on the raw character dictionaries see the corrected text.
52
+ # For OCR-generated words we usually have a single representative char
53
+ # dict; for native words there may be one per character.
54
+ # ---------------------------------------------------------------------
55
+ try:
56
+ if hasattr(self, "_char_dicts") and isinstance(self._char_dicts, list):
57
+ if not self._char_dicts:
58
+ return # Nothing to update
59
+
60
+ if len(self._char_dicts) == 1:
61
+ # Simple case – a single char dict represents the whole text
62
+ self._char_dicts[0]["text"] = value
63
+ else:
64
+ # Update character-by-character. If new value is shorter than
65
+ # existing char dicts, truncate remaining dicts by setting
66
+ # their text to empty string; if longer, extend by repeating
67
+ # the last char dict geometry (best-effort fallback).
68
+ for idx, char_dict in enumerate(self._char_dicts):
69
+ if idx < len(value):
70
+ char_dict["text"] = value[idx]
71
+ else:
72
+ # Clear extra characters from old text
73
+ char_dict["text"] = ""
74
+
75
+ # If new text is longer, append additional char dicts based
76
+ # on the last available geometry. This is an approximation
77
+ # but ensures text length consistency for downstream joins.
78
+ if len(value) > len(self._char_dicts):
79
+ last_dict = self._char_dicts[-1]
80
+ for extra_idx in range(len(self._char_dicts), len(value)):
81
+ new_dict = last_dict.copy()
82
+ new_dict["text"] = value[extra_idx]
83
+ # Advance x0/x1 roughly by average char width if available
84
+ char_width = last_dict.get("adv") or (
85
+ last_dict.get("width", 0) / max(len(self.text), 1)
86
+ )
87
+ if isinstance(char_width, (int, float)) and char_width > 0:
88
+ shift = char_width * (extra_idx - len(self._char_dicts) + 1)
89
+ new_dict["x0"] = last_dict.get("x0", 0) + shift
90
+ new_dict["x1"] = last_dict.get("x1", 0) + shift
91
+ self._char_dicts.append(new_dict)
92
+ except Exception as sync_err: # pragma: no cover
93
+ # Keep failures silent but logged; better to have outdated chars than crash.
94
+ import logging
95
+ logger = logging.getLogger(__name__)
96
+ logger.debug(f"TextElement: Failed to sync _char_dicts after text update: {sync_err}")
97
+
49
98
  @property
50
99
  def source(self) -> str:
51
100
  """Get the source of this text element (pdf or ocr)."""
@@ -2,6 +2,7 @@ import json
2
2
  import logging
3
3
  import os
4
4
  import tempfile
5
+ import warnings
5
6
  from typing import Any, Dict, List, Optional, Tuple, Union
6
7
 
7
8
  import numpy as np
@@ -310,13 +311,39 @@ class DocumentQA:
310
311
  QAResult instance with answer details
311
312
  """
312
313
  # Ensure we have text elements on the page
313
- if not page.find_all("text"):
314
- # Apply OCR if no text is available
315
- logger.info(f"No text elements found on page {page.index}, applying OCR")
316
- page.apply_ocr()
314
+ elements = page.find_all("text")
315
+ if not elements:
316
+ # Warn that no text was found and recommend OCR
317
+ warnings.warn(
318
+ f"No text elements found on page {page.index}. "
319
+ "Consider applying OCR first using page.apply_ocr() to extract text from images.",
320
+ UserWarning
321
+ )
322
+
323
+ # Return appropriate "not found" result(s)
324
+ if isinstance(question, (list, tuple)):
325
+ return [
326
+ QAResult(
327
+ question=q,
328
+ answer="",
329
+ confidence=0.0,
330
+ start=-1,
331
+ end=-1,
332
+ found=False,
333
+ )
334
+ for q in question
335
+ ]
336
+ else:
337
+ return QAResult(
338
+ question=question,
339
+ answer="",
340
+ confidence=0.0,
341
+ start=-1,
342
+ end=-1,
343
+ found=False,
344
+ )
317
345
 
318
346
  # Extract word boxes
319
- elements = page.find_all("text")
320
347
  word_boxes = self._get_word_boxes_from_elements(elements, offset_x=0, offset_y=0)
321
348
 
322
349
  # Generate a high-resolution image of the page
@@ -393,10 +420,37 @@ class DocumentQA:
393
420
  # Get all text elements within the region
394
421
  elements = region.find_all("text")
395
422
 
396
- # Apply OCR if needed
423
+ # Check if we have text elements
397
424
  if not elements:
398
- logger.info(f"No text elements found in region, applying OCR")
399
- elements = region.apply_ocr()
425
+ # Warn that no text was found and recommend OCR
426
+ warnings.warn(
427
+ f"No text elements found in region on page {region.page.index}. "
428
+ "Consider applying OCR first using region.apply_ocr() to extract text from images.",
429
+ UserWarning
430
+ )
431
+
432
+ # Return appropriate "not found" result(s)
433
+ if isinstance(question, (list, tuple)):
434
+ return [
435
+ QAResult(
436
+ question=q,
437
+ answer="",
438
+ confidence=0.0,
439
+ start=-1,
440
+ end=-1,
441
+ found=False,
442
+ )
443
+ for q in question
444
+ ]
445
+ else:
446
+ return QAResult(
447
+ question=question,
448
+ answer="",
449
+ confidence=0.0,
450
+ start=-1,
451
+ end=-1,
452
+ found=False,
453
+ )
400
454
 
401
455
  # Extract word boxes adjusted for the cropped region
402
456
  x0, top = int(region.x0), int(region.top)
@@ -36,7 +36,7 @@ def create_correction_task_package(
36
36
  output_zip_path: str,
37
37
  overwrite: bool = False,
38
38
  suggest=None,
39
- resolution: int = 150,
39
+ resolution: int = 300,
40
40
  ) -> None:
41
41
  """
42
42
  Creates a zip package containing data for an OCR correction task.
@@ -160,8 +160,22 @@ def create_correction_task_package(
160
160
 
161
161
  # 3. Prepare region data for manifest
162
162
  page_regions_data = []
163
- # Calculate scaling factor from PDF coordinates (72 DPI) to image pixels
164
- coord_scale_factor = resolution / 72.0
163
+ # Calculate scaling factor *from PDF points* to *actual image pixels*.
164
+ # We prefer using the rendered image dimensions rather than the nominal
165
+ # resolution value, because the image might have been resized (e.g. via
166
+ # global `natural_pdf.options.image.width`). This guarantees that the
167
+ # bounding boxes we write to the manifest always align with the exact
168
+ # pixel grid of the exported image.
169
+
170
+ try:
171
+ scale_x = img.width / float(page.width) if page.width else 1.0
172
+ scale_y = img.height / float(page.height) if page.height else 1.0
173
+ except Exception as e:
174
+ logger.warning(
175
+ f"Could not compute per-axis scale factors for page {page.number}: {e}. "
176
+ "Falling back to resolution-based scaling."
177
+ )
178
+ scale_x = scale_y = resolution / 72.0
165
179
 
166
180
  i = -1
167
181
  for elem in tqdm(ocr_elements):
@@ -176,12 +190,12 @@ def create_correction_task_package(
176
190
  continue
177
191
  region_id = f"r_{page.index}_{i}" # ID unique within page
178
192
 
179
- # Scale coordinates to match the 300 DPI image
193
+ # Scale coordinates to match the **actual** image dimensions.
180
194
  scaled_bbox = [
181
- elem.x0 * coord_scale_factor,
182
- elem.top * coord_scale_factor,
183
- elem.x1 * coord_scale_factor,
184
- elem.bottom * coord_scale_factor,
195
+ elem.x0 * scale_x,
196
+ elem.top * scale_y,
197
+ elem.x1 * scale_x,
198
+ elem.bottom * scale_y,
185
199
  ]
186
200
 
187
201
  corrected = elem.text
@@ -191,7 +205,7 @@ def create_correction_task_package(
191
205
 
192
206
  page_regions_data.append(
193
207
  {
194
- "resolution": resolution,
208
+ "resolution": scale_x * 72.0,
195
209
  "id": region_id,
196
210
  "bbox": scaled_bbox,
197
211
  "ocr_text": elem.text,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.24
3
+ Version: 0.1.26.dev0
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -26,20 +26,20 @@ natural_pdf/collections/pdf_collection.py,sha256=HLlyakM--23ZOeHDPucoM6Tw3yUyMXm
26
26
  natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
27
27
  natural_pdf/core/element_manager.py,sha256=_UdXu51sLi6STzc8Pj4k8R721G3yJixXDLuRHn3hmr8,25731
28
28
  natural_pdf/core/highlighting_service.py,sha256=DKoaxiiuQsWgtf6wSroMAIcFiqJOOF7dXhciYdQKdCw,38223
29
- natural_pdf/core/page.py,sha256=TOtpUp5lRhDj32wv3yvRaS8kxPX6R9904OCC6uHFi84,119512
29
+ natural_pdf/core/page.py,sha256=GqYfYiVkuL1M_GoPTcLL0yWFXISN38BUCdQIKyF6vJ0,122721
30
30
  natural_pdf/core/pdf.py,sha256=qsSW4RxOJRmCnweLPMs0NhzkRfiAVdghTgnh4D_wuO4,74295
31
31
  natural_pdf/describe/__init__.py,sha256=B3zjuHjFI_dFuBLgXR1Q4v7c72fVDyk84d2hs0H4KV8,561
32
32
  natural_pdf/describe/base.py,sha256=mUvEydumXXPJ2FkWAYm1BbWrRWY81I0dMyQrEU32rmc,17256
33
33
  natural_pdf/describe/elements.py,sha256=xD8wwR1z5IKat7RIwoAwQRUEL6zJTEwcOKorF4F-xPg,12717
34
34
  natural_pdf/describe/mixin.py,sha256=U0x6v8r57KQb8qC3VVo64hvhfXQWsti8vdKBM7AXnMo,3116
35
- natural_pdf/describe/summary.py,sha256=h5zy9zG7t27wFnJ2hEguGSoURtN2IR4x6WBO3aXB4eo,7980
35
+ natural_pdf/describe/summary.py,sha256=7FIF3zF6bzNx-gx4pCJr2XQFKiVzOEDnWsAYQ_mr9L0,7982
36
36
  natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
37
37
  natural_pdf/elements/base.py,sha256=iw-Ab0o7eI69npt0gAxQvA14GPWHAAhkLrJ_JeKvIos,43309
38
38
  natural_pdf/elements/collections.py,sha256=JrM42VPRtDOJ9Q9KIR3SrcbamiiCHXI4nzTq2BBkeEk,124223
39
- natural_pdf/elements/line.py,sha256=300kSFBDUBIudfeQtH_tzW9gTYRgRKUDPiTABw6J-BE,4782
39
+ natural_pdf/elements/line.py,sha256=aQm4pDdlQSDAAXqrdg4AU-oTl9JCXgYuaJN0EYls6E0,4920
40
40
  natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
41
- natural_pdf/elements/region.py,sha256=CVncbiCk8ivn04CI7Ob93O7UY0ANVpCJwikBt-jVWgg,123698
42
- natural_pdf/elements/text.py,sha256=yshGrvdiBZSkYhQfdi6Yz6NN0kWvmqKHSSC82D829os,11470
41
+ natural_pdf/elements/region.py,sha256=63rdyjOnbmsgTN1WMSOyQWQnvJRUYco9qTWLqBi3TBk,125498
42
+ natural_pdf/elements/text.py,sha256=x163dnr2ZDEIE_WZXWH5hXJtoO-6cvTdA2BABcZd69U,14575
43
43
  natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
44
44
  natural_pdf/exporters/__init__.py,sha256=g1WRPCDVzceaUUsm8dchPhzdHFSjYM0NfRyc8iN0mtE,644
45
45
  natural_pdf/exporters/base.py,sha256=XhR1xlkHOh7suOuX7mWbsj1h2o1pZNet-OAS5YCJyeI,2115
@@ -70,7 +70,7 @@ natural_pdf/ocr/ocr_manager.py,sha256=K2gpFo3e6RB1ouXOstlEAAYd14DbjBNt5RH6J7ZdDQ
70
70
  natural_pdf/ocr/ocr_options.py,sha256=l33QKu_93r-uwi3t_v8UH8pEgHo6HTVzP4tfmQFRF1w,5488
71
71
  natural_pdf/ocr/utils.py,sha256=OxuHwDbHWj6setvnC0QYwMHrAjxGkhmLzWHpMqqGupA,4397
72
72
  natural_pdf/qa/__init__.py,sha256=2u2KJcA71g1I0HnLD-j6yvDw1moAjo9kkLhhfoYRURM,166
73
- natural_pdf/qa/document_qa.py,sha256=6-XuIEFf5BcVA_e85FBmAeXpNZgzZhTBDkNUMPAl-tc,17803
73
+ natural_pdf/qa/document_qa.py,sha256=cli1E9NBSVtT5Qo6n7ZRd7BpstnbpZfkljX69LGTYU8,19608
74
74
  natural_pdf/qa/qa_result.py,sha256=_q4dlSqsjtgomcI8-pqbOT69lqQKnEMkhZNydoxEkkE,2227
75
75
  natural_pdf/search/__init__.py,sha256=0Xa7tT_2q57wHObFMQLQLd4gd9AV0oyS-svV6BmmdMI,4276
76
76
  natural_pdf/search/lancedb_search_service.py,sha256=6dz2IEZUWk3hFW28C-LF_85pWohd7Sr5k44bM0pBdm4,14472
@@ -86,15 +86,15 @@ natural_pdf/utils/debug.py,sha256=RN7H3E6ph-GtxubCW6psW7TO8o2BxcNLiEzByTVR9fk,99
86
86
  natural_pdf/utils/highlighting.py,sha256=EIY6ihVGtUTS_DjWyxpnr_UXpcR4btC1KhSGQ9VUfKg,698
87
87
  natural_pdf/utils/identifiers.py,sha256=P7n6owcubnF8oAMa_UfYtENmIaJQdH_AMC9Jbs2bWXo,1117
88
88
  natural_pdf/utils/locks.py,sha256=7HJqV0VsNcOfISnbw8goCKWP5ck11uSJo6T_x9XIPKI,215
89
- natural_pdf/utils/packaging.py,sha256=Jshxp6S1zfcqoZmFhdd7WOpL--b6rBSz-Y9mYqELXIY,21581
89
+ natural_pdf/utils/packaging.py,sha256=e7U2wWvpunlAWpPFexNkD_c4dYbPp5LcKo7og4bNGvk,22411
90
90
  natural_pdf/utils/reading_order.py,sha256=s3DsYq_3g_1YA07qhd4BGEjeIRTeyGtnwc_hNtSzwBY,7290
91
91
  natural_pdf/utils/text_extraction.py,sha256=mDeN1_VevNi3RwvFe48PM5vBh-A5WeBlYgP6lSjBaOk,10854
92
92
  natural_pdf/utils/visualization.py,sha256=30pRWQdsRJh2pSObh-brKVsFgC1n8tHmSrta_UDnVPw,8989
93
93
  natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
94
94
  natural_pdf/widgets/viewer.py,sha256=2VUY1TzWMDe9I-IVNOosKZ2LaqpjLB62ftMAdk-s6_8,24952
95
- natural_pdf-0.1.24.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
96
- natural_pdf-0.1.24.dist-info/METADATA,sha256=qcyQUXKXciLsomzdsdkQ4inSw_MJbczyj8oPq4KVGZQ,6684
97
- natural_pdf-0.1.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
98
- natural_pdf-0.1.24.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
99
- natural_pdf-0.1.24.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
100
- natural_pdf-0.1.24.dist-info/RECORD,,
95
+ natural_pdf-0.1.26.dev0.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
96
+ natural_pdf-0.1.26.dev0.dist-info/METADATA,sha256=Y0nVAEzmtTldA3i9iY2Gtn-WO4FzjhmC1Le6M-WJ_Ko,6689
97
+ natural_pdf-0.1.26.dev0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
98
+ natural_pdf-0.1.26.dev0.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
99
+ natural_pdf-0.1.26.dev0.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
100
+ natural_pdf-0.1.26.dev0.dist-info/RECORD,,