natural-pdf 0.1.30__py3-none-any.whl → 0.1.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1319,6 +1319,28 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
1319
1319
  table_settings.setdefault("vertical_strategy", "lines")
1320
1320
  table_settings.setdefault("horizontal_strategy", "lines")
1321
1321
 
1322
+ # -------------------------------------------------------------
1323
+ # Auto-inject tolerances when text-based strategies are requested.
1324
+ # This must happen AFTER alias handling (so strategies are final)
1325
+ # and BEFORE we delegate to _extract_table_* helpers.
1326
+ # -------------------------------------------------------------
1327
+ if "text" in (table_settings.get("vertical_strategy"), table_settings.get("horizontal_strategy")):
1328
+ page_cfg = getattr(self.page, "_config", {})
1329
+ # Ensure text_* tolerances passed to pdfplumber
1330
+ if "text_x_tolerance" not in table_settings and "x_tolerance" not in table_settings:
1331
+ if page_cfg.get("x_tolerance") is not None:
1332
+ table_settings["text_x_tolerance"] = page_cfg["x_tolerance"]
1333
+ if "text_y_tolerance" not in table_settings and "y_tolerance" not in table_settings:
1334
+ if page_cfg.get("y_tolerance") is not None:
1335
+ table_settings["text_y_tolerance"] = page_cfg["y_tolerance"]
1336
+
1337
+ # Snap / join tolerances (~ line spacing)
1338
+ if "snap_tolerance" not in table_settings and "snap_x_tolerance" not in table_settings:
1339
+ snap = max(1, round((page_cfg.get("y_tolerance", 1)) * 0.9))
1340
+ table_settings["snap_tolerance"] = snap
1341
+ if "join_tolerance" not in table_settings and "join_x_tolerance" not in table_settings:
1342
+ table_settings["join_tolerance"] = table_settings["snap_tolerance"]
1343
+
1322
1344
  logger.debug(f"Region {self.bbox}: Extracting table using method '{effective_method}'")
1323
1345
 
1324
1346
  # Use the selected method
@@ -1438,6 +1460,30 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
1438
1460
  Returns:
1439
1461
  List of tables, where each table is a list of rows, and each row is a list of cell values
1440
1462
  """
1463
+ # Inject global PDF-level text tolerances if not explicitly present
1464
+ pdf_cfg = getattr(self.page, "_config", getattr(self.page._parent, "_config", {}))
1465
+ _uses_text = "text" in (
1466
+ table_settings.get("vertical_strategy"),
1467
+ table_settings.get("horizontal_strategy"),
1468
+ )
1469
+ if _uses_text and "text_x_tolerance" not in table_settings and "x_tolerance" not in table_settings:
1470
+ x_tol = pdf_cfg.get("x_tolerance")
1471
+ if x_tol is not None:
1472
+ table_settings.setdefault("text_x_tolerance", x_tol)
1473
+ if _uses_text and "text_y_tolerance" not in table_settings and "y_tolerance" not in table_settings:
1474
+ y_tol = pdf_cfg.get("y_tolerance")
1475
+ if y_tol is not None:
1476
+ table_settings.setdefault("text_y_tolerance", y_tol)
1477
+
1478
+ if _uses_text and "snap_tolerance" not in table_settings and "snap_x_tolerance" not in table_settings:
1479
+ snap = max(1, round((pdf_cfg.get("y_tolerance", 1)) * 0.9))
1480
+ table_settings.setdefault("snap_tolerance", snap)
1481
+ if _uses_text and "join_tolerance" not in table_settings and "join_x_tolerance" not in table_settings:
1482
+ join = table_settings.get("snap_tolerance", 1)
1483
+ table_settings.setdefault("join_tolerance", join)
1484
+ table_settings.setdefault("join_x_tolerance", join)
1485
+ table_settings.setdefault("join_y_tolerance", join)
1486
+
1441
1487
  # Create a crop of the page for this region
1442
1488
  cropped = self.page._page.crop(self.bbox)
1443
1489
 
@@ -1458,6 +1504,21 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
1458
1504
  Returns:
1459
1505
  Table data as a list of rows, where each row is a list of cell values
1460
1506
  """
1507
+ # Inject global PDF-level text tolerances if not explicitly present
1508
+ pdf_cfg = getattr(self.page, "_config", getattr(self.page._parent, "_config", {}))
1509
+ _uses_text = "text" in (
1510
+ table_settings.get("vertical_strategy"),
1511
+ table_settings.get("horizontal_strategy"),
1512
+ )
1513
+ if _uses_text and "text_x_tolerance" not in table_settings and "x_tolerance" not in table_settings:
1514
+ x_tol = pdf_cfg.get("x_tolerance")
1515
+ if x_tol is not None:
1516
+ table_settings.setdefault("text_x_tolerance", x_tol)
1517
+ if _uses_text and "text_y_tolerance" not in table_settings and "y_tolerance" not in table_settings:
1518
+ y_tol = pdf_cfg.get("y_tolerance")
1519
+ if y_tol is not None:
1520
+ table_settings.setdefault("text_y_tolerance", y_tol)
1521
+
1461
1522
  # Create a crop of the page for this region
1462
1523
  cropped = self.page._page.crop(self.bbox)
1463
1524
 
@@ -1943,21 +2004,45 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
1943
2004
  """
1944
2005
  Apply OCR to this region and return the created text elements.
1945
2006
 
2007
+ This method supports two modes:
2008
+ 1. **Built-in OCR Engines** (default) – identical to previous behaviour. Pass typical
2009
+ parameters like ``engine='easyocr'`` or ``languages=['en']`` and the method will
2010
+ route the request through :class:`OCRManager`.
2011
+ 2. **Custom OCR Function** – pass a *callable* under the keyword ``function`` (or
2012
+ ``ocr_function``). The callable will receive *this* Region instance and should
2013
+ return the extracted text (``str``) or ``None``. Internally the call is
2014
+ delegated to :pymeth:`apply_custom_ocr` so the same logic (replacement, element
2015
+ creation, etc.) is re-used.
2016
+
2017
+ Examples
2018
+ ---------
2019
+ >>> def llm_ocr(region):
2020
+ ... image = region.to_image(resolution=300, crop=True)
2021
+ ... return my_llm_client.ocr(image)
2022
+ >>> region.apply_ocr(function=llm_ocr)
2023
+
1946
2024
  Args:
1947
- replace: If True (default), removes existing OCR elements in the region
1948
- before adding new ones. If False, adds new OCR elements without
1949
- removing existing ones.
1950
- **ocr_params: Keyword arguments passed to the OCR Manager.
1951
- Common parameters like `engine`, `languages`, `min_confidence`,
1952
- `device`, and `resolution` (for image rendering) should be
1953
- provided here. **The `languages` list must contain codes
1954
- understood by the specific engine selected.** No mapping
1955
- is performed. Engine-specific settings can be passed in
1956
- an `options` object (e.g., `options=EasyOCROptions(...)`).
2025
+ replace: Whether to remove existing OCR elements first (default ``True``).
2026
+ **ocr_params: Parameters for the built-in OCR manager *or* the special
2027
+ ``function``/``ocr_function`` keyword to trigger custom mode.
2028
+
2029
+ Returns
2030
+ -------
2031
+ Self for chaining.
2032
+ """
2033
+ # --- Custom OCR function path --------------------------------------------------
2034
+ custom_func = ocr_params.pop("function", None) or ocr_params.pop("ocr_function", None)
2035
+ if callable(custom_func):
2036
+ # Delegate to the specialised helper while preserving key kwargs
2037
+ return self.apply_custom_ocr(
2038
+ ocr_function=custom_func,
2039
+ source_label=ocr_params.pop("source_label", "custom-ocr"),
2040
+ replace=replace,
2041
+ confidence=ocr_params.pop("confidence", None),
2042
+ add_to_page=ocr_params.pop("add_to_page", True),
2043
+ )
1957
2044
 
1958
- Returns:
1959
- Self for method chaining.
1960
- """
2045
+ # --- Original built-in OCR engine path (unchanged except docstring) ------------
1961
2046
  # Ensure OCRManager is available
1962
2047
  if not hasattr(self.page._parent, "_ocr_manager") or self.page._parent._ocr_manager is None:
1963
2048
  logger.error("OCRManager not available on parent PDF. Cannot apply OCR to region.")
@@ -2123,6 +2208,133 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
2123
2208
  logger.info(f"Region {self.bbox}: Added {len(created_elements)} elements from OCR.")
2124
2209
  return self
2125
2210
 
2211
+ def apply_custom_ocr(
2212
+ self,
2213
+ ocr_function: Callable[["Region"], Optional[str]],
2214
+ source_label: str = "custom-ocr",
2215
+ replace: bool = True,
2216
+ confidence: Optional[float] = None,
2217
+ add_to_page: bool = True,
2218
+ ) -> "Region":
2219
+ """
2220
+ Apply a custom OCR function to this region and create text elements from the results.
2221
+
2222
+ This is useful when you want to use a custom OCR method (e.g., an LLM API,
2223
+ specialized OCR service, or any custom logic) instead of the built-in OCR engines.
2224
+
2225
+ Args:
2226
+ ocr_function: A callable that takes a Region and returns the OCR'd text (or None).
2227
+ The function receives this region as its argument and should return
2228
+ the extracted text as a string, or None if no text was found.
2229
+ source_label: Label to identify the source of these text elements (default: "custom-ocr").
2230
+ This will be set as the 'source' attribute on created elements.
2231
+ replace: If True (default), removes existing OCR elements in this region before
2232
+ adding new ones. If False, adds new OCR elements alongside existing ones.
2233
+ confidence: Optional confidence score for the OCR result (0.0-1.0).
2234
+ If None, defaults to 1.0 if text is returned, 0.0 if None is returned.
2235
+ add_to_page: If True (default), adds the created text element to the page.
2236
+ If False, creates the element but doesn't add it to the page.
2237
+
2238
+ Returns:
2239
+ Self for method chaining.
2240
+
2241
+ Example:
2242
+ # Using with an LLM
2243
+ def ocr_with_llm(region):
2244
+ image = region.to_image(resolution=300, crop=True)
2245
+ # Call your LLM API here
2246
+ return llm_client.ocr(image)
2247
+
2248
+ region.apply_custom_ocr(ocr_with_llm)
2249
+
2250
+ # Using with a custom OCR service
2251
+ def ocr_with_service(region):
2252
+ img_bytes = region.to_image(crop=True).tobytes()
2253
+ response = ocr_service.process(img_bytes)
2254
+ return response.text
2255
+
2256
+ region.apply_custom_ocr(ocr_with_service, source_label="my-ocr-service")
2257
+ """
2258
+ # If replace is True, remove existing OCR elements in this region
2259
+ if replace:
2260
+ logger.info(
2261
+ f"Region {self.bbox}: Removing existing OCR elements before applying custom OCR."
2262
+ )
2263
+
2264
+ removed_count = 0
2265
+
2266
+ # Helper to remove a single element safely
2267
+ def _safe_remove(elem):
2268
+ nonlocal removed_count
2269
+ success = False
2270
+ if hasattr(elem, "page") and hasattr(elem.page, "_element_mgr"):
2271
+ etype = getattr(elem, "object_type", "word")
2272
+ if etype == "word":
2273
+ etype_key = "words"
2274
+ elif etype == "char":
2275
+ etype_key = "chars"
2276
+ else:
2277
+ etype_key = etype + "s" if not etype.endswith("s") else etype
2278
+ try:
2279
+ success = elem.page._element_mgr.remove_element(elem, etype_key)
2280
+ except Exception:
2281
+ success = False
2282
+ if success:
2283
+ removed_count += 1
2284
+
2285
+ # Remove OCR elements overlapping this region
2286
+ for word in list(self.page._element_mgr.words):
2287
+ if getattr(word, "source", "").startswith("ocr") and self.intersects(word):
2288
+ _safe_remove(word)
2289
+
2290
+ # Also check custom-ocr sources
2291
+ for word in list(self.page._element_mgr.words):
2292
+ if getattr(word, "source", "") == source_label and self.intersects(word):
2293
+ _safe_remove(word)
2294
+
2295
+ if removed_count > 0:
2296
+ logger.info(
2297
+ f"Region {self.bbox}: Removed {removed_count} existing OCR elements."
2298
+ )
2299
+
2300
+ # Call the custom OCR function
2301
+ try:
2302
+ logger.debug(f"Region {self.bbox}: Calling custom OCR function...")
2303
+ ocr_text = ocr_function(self)
2304
+
2305
+ if ocr_text is not None and not isinstance(ocr_text, str):
2306
+ logger.warning(
2307
+ f"Custom OCR function returned non-string type ({type(ocr_text)}). "
2308
+ f"Converting to string."
2309
+ )
2310
+ ocr_text = str(ocr_text)
2311
+
2312
+ except Exception as e:
2313
+ logger.error(
2314
+ f"Error calling custom OCR function for region {self.bbox}: {e}",
2315
+ exc_info=True
2316
+ )
2317
+ return self
2318
+
2319
+ # Create text element if we got text
2320
+ if ocr_text is not None:
2321
+ # Use the to_text_element method to create the element
2322
+ text_element = self.to_text_element(
2323
+ text_content=ocr_text,
2324
+ source_label=source_label,
2325
+ confidence=confidence,
2326
+ add_to_page=add_to_page
2327
+ )
2328
+
2329
+ logger.info(
2330
+ f"Region {self.bbox}: Created text element with {len(ocr_text)} chars"
2331
+ f"{' and added to page' if add_to_page else ''}"
2332
+ )
2333
+ else:
2334
+ logger.debug(f"Region {self.bbox}: Custom OCR function returned None (no text found)")
2335
+
2336
+ return self
2337
+
2126
2338
  def get_section_between(self, start_element=None, end_element=None, boundary_inclusion="both"):
2127
2339
  """
2128
2340
  Get a section between two elements within this region.
@@ -2917,6 +3129,33 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
2917
3129
  if not hasattr(self, "page") or self.page is None:
2918
3130
  raise ValueError("Region must have a valid 'page' attribute to create a TextElement.")
2919
3131
 
3132
+ # Create character dictionaries for the text
3133
+ char_dicts = []
3134
+ if actual_text:
3135
+ # Create a single character dict that spans the entire region
3136
+ # This is a simplified approach - OCR engines typically create one per character
3137
+ char_dict = {
3138
+ "text": actual_text,
3139
+ "x0": self.x0,
3140
+ "top": self.top,
3141
+ "x1": self.x1,
3142
+ "bottom": self.bottom,
3143
+ "width": self.width,
3144
+ "height": self.height,
3145
+ "object_type": "char",
3146
+ "page_number": self.page.page_number,
3147
+ "fontname": default_font_name,
3148
+ "size": default_font_size,
3149
+ "upright": True,
3150
+ "direction": 1,
3151
+ "adv": self.width,
3152
+ "source": source_label,
3153
+ "confidence": final_confidence,
3154
+ "stroking_color": (0, 0, 0),
3155
+ "non_stroking_color": (0, 0, 0),
3156
+ }
3157
+ char_dicts.append(char_dict)
3158
+
2920
3159
  elem_data = {
2921
3160
  "text": actual_text,
2922
3161
  "x0": self.x0,
@@ -2936,7 +3175,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
2936
3175
  "adv": self.width,
2937
3176
  "source": source_label,
2938
3177
  "confidence": final_confidence,
2939
- "_char_dicts": [],
3178
+ "_char_dicts": char_dicts,
2940
3179
  }
2941
3180
  text_element = TextElement(elem_data, self.page)
2942
3181
 
@@ -2952,6 +3191,10 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
2952
3191
  logger.debug(
2953
3192
  f"TextElement created from region {self.bbox} and added to page {self.page.page_number} as {add_as_type}."
2954
3193
  )
3194
+ # Also add character dictionaries to the chars collection
3195
+ if char_dicts and object_type == "word":
3196
+ for char_dict in char_dicts:
3197
+ self.page._element_mgr.add_element(char_dict, element_type="chars")
2955
3198
  else:
2956
3199
  page_num_str = (
2957
3200
  str(self.page.page_number) if hasattr(self.page, "page_number") else "N/A"
@@ -468,3 +468,32 @@ class TextElement(Element):
468
468
  info[f"raw_{prop}"] = self._obj[prop]
469
469
 
470
470
  return info
471
+
472
+ @property
473
+ def visual_text(self) -> str:
474
+ """Return the text converted to *visual* order using the Unicode BiDi algorithm.
475
+
476
+ This helper is intentionally side-effect–free: it does **not** mutate
477
+ ``self.text`` or the underlying character dictionaries. It should be
478
+ used by UI / rendering code that needs human-readable RTL/LTR mixing.
479
+ """
480
+ logical = self.text
481
+ if not logical:
482
+ return logical
483
+
484
+ # Quick check – bail out if no RTL chars to save import/CPU.
485
+ import unicodedata
486
+
487
+ if not any(unicodedata.bidirectional(ch) in ("R", "AL", "AN") for ch in logical):
488
+ return logical
489
+
490
+ try:
491
+ from bidi.algorithm import get_display # type: ignore
492
+ from natural_pdf.utils.bidi_mirror import mirror_brackets
493
+
494
+ # Convert from logical order to visual order
495
+ visual = get_display(logical, base_dir="R")
496
+ return mirror_brackets(visual)
497
+ except Exception:
498
+ # If python-bidi is missing or errors, fall back to logical order
499
+ return logical
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.30
3
+ Version: 0.1.32
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -1,11 +1,8 @@
1
- bad_pdf_analysis/analyze_10_more.py,sha256=UjsTuHE1GUMoVjkX3afy3x6DfpXyfZXHgS2W1GQqUmw,11906
2
- bad_pdf_analysis/analyze_final_10.py,sha256=xYkIId0nF9LpWHRLDP1_nlJfJfC0b0Tu4mLu-3mim-0,25170
3
- bad_pdf_analysis/analyze_specific_pages.py,sha256=wzq3_ZWR28hFdT7GEkayHPYgsk20OpD476LYmy2rAEk,13725
4
- bad_pdf_analysis/analyze_specific_pages_direct.py,sha256=307gSNplwOtNTR9a0lEQWxlAKGeoZIcDe5z1pROKUXY,14846
5
1
  natural_pdf/__init__.py,sha256=qDFJNF8sbEDO-2WSFAxoWEM8updOUP6dB-ckya0kxfs,3275
6
2
  natural_pdf/cli.py,sha256=IXrP2lCHihr-ed-CFiDbMTnSsutQa1j1PYALOLGbpsc,4019
7
- natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
8
- natural_pdf/analyzers/shape_detection_mixin.py,sha256=0a4uuKQ4Z1Ta_UVuUtX7mVhlwmXdAkoHTyC5wZyp5do,94455
3
+ natural_pdf/analyzers/__init__.py,sha256=MQRctn4i5Q7u8pb8vQVHKEXUiVGpKyPZUECrlDH4AuU,673
4
+ natural_pdf/analyzers/guides.py,sha256=tzyViSBDdM66mT0niwFTDIJ16UzRCZ18Iqv8wA5DYAk,90302
5
+ natural_pdf/analyzers/shape_detection_mixin.py,sha256=q7gDM-z2t7bSTxjfV2aaW3533CySu1qsEpu4wb5Rp-I,62688
9
6
  natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
10
7
  natural_pdf/analyzers/text_structure.py,sha256=VfKTsTFrK877sC0grsis9jK3rrgp0Mbp13VWEbukTcs,28437
11
8
  natural_pdf/analyzers/utils.py,sha256=PYbzJzSAHZ7JsMes84WIrSbA0zkjJGs0CLvIeINsf_k,2100
@@ -28,10 +25,10 @@ natural_pdf/classification/results.py,sha256=Mcay-xLBHbYoZ8U7f4gMj2IhhH_yORNEkZH
28
25
  natural_pdf/collections/mixins.py,sha256=sj76Cn6EdBtb5f-bdAV-1qpdixX8tI4BzPccPiYLI1w,5117
29
26
  natural_pdf/collections/pdf_collection.py,sha256=HLlyakM--23ZOeHDPucoM6Tw3yUyMXm0SSoqJwxRc2E,30744
30
27
  natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
31
- natural_pdf/core/element_manager.py,sha256=96v_w3kXhSUqRsJlX5Bl6O6hJzpYRqDn4xoyRsdqZ7o,49260
28
+ natural_pdf/core/element_manager.py,sha256=A6GJk9kwTzt-aSz4-SWaRHLZRbIMFFLce3CpxSyfkV4,51749
32
29
  natural_pdf/core/highlighting_service.py,sha256=WKDqRpex1yS8CWhkNitWtKhxbyRRCLu3Xsct_HTPsD4,40774
33
- natural_pdf/core/page.py,sha256=kQKKqsbOaNeLhW3ark6mueDS-4tsopJcGcoMmKPK6B8,125624
34
- natural_pdf/core/pdf.py,sha256=YfniZp54AyptzMyr7ZP8n617n4wlV28SPrajt32nNBk,74233
30
+ natural_pdf/core/page.py,sha256=843_Fyk1gxZ8nqERJjjjoRD3iM4pFJy9a0zQSyMthiQ,128476
31
+ natural_pdf/core/pdf.py,sha256=mC4GZjPXx_bK6RUlhLpnJnapkHDhbgJpgpcUJOvb7OE,75290
35
32
  natural_pdf/describe/__init__.py,sha256=B3zjuHjFI_dFuBLgXR1Q4v7c72fVDyk84d2hs0H4KV8,561
36
33
  natural_pdf/describe/base.py,sha256=HaWlHltb-dw6ug4mfR_iBLHWxr1OdPwLaUshXRxO7gg,18462
37
34
  natural_pdf/describe/elements.py,sha256=COvKF3B_RbAxXl5ORJDubV4C5PsiuSfuzD0ufPIJTFM,12983
@@ -39,12 +36,12 @@ natural_pdf/describe/mixin.py,sha256=U0x6v8r57KQb8qC3VVo64hvhfXQWsti8vdKBM7AXnMo
39
36
  natural_pdf/describe/summary.py,sha256=7FIF3zF6bzNx-gx4pCJr2XQFKiVzOEDnWsAYQ_mr9L0,7982
40
37
  natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
41
38
  natural_pdf/elements/base.py,sha256=VshU4RstdzONJFq_8UVIjT_lVOai0MwMFsSFrCN-IO8,47299
42
- natural_pdf/elements/collections.py,sha256=52Oac96svzm_QMJcVaItnCG9P7d6JMNiGEx9lHgDEQg,125915
39
+ natural_pdf/elements/collections.py,sha256=1E2MSg2NNcEcoRM2rumrv_CqIdO7DgbRHYEtfw35FaQ,128457
43
40
  natural_pdf/elements/image.py,sha256=UjHNzCgDzOseQmLpkKshcxg51DPmWNIAVYxZ0TAMyUI,1423
44
41
  natural_pdf/elements/line.py,sha256=aQm4pDdlQSDAAXqrdg4AU-oTl9JCXgYuaJN0EYls6E0,4920
45
42
  natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
46
- natural_pdf/elements/region.py,sha256=v1PzWvQoGHGdn7SQiPf4Oq3hIGueIfYGwcZ05ZU6XPE,127692
47
- natural_pdf/elements/text.py,sha256=2neapKplef0FsAMYWr4OdICt-TmrZ3z9z0YBrX8FrSk,17738
43
+ natural_pdf/elements/region.py,sha256=8SKhzCJ6sELZxJcM2i_58YhEKU6HBvaJ7Oj6E3bOsHw,139523
44
+ natural_pdf/elements/text.py,sha256=kw7u2KfHtDB905YawP7Hs89kcR8XnbtpkYQGEk6LNyk,18860
48
45
  natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
49
46
  natural_pdf/exporters/__init__.py,sha256=g1WRPCDVzceaUUsm8dchPhzdHFSjYM0NfRyc8iN0mtE,644
50
47
  natural_pdf/exporters/base.py,sha256=XhR1xlkHOh7suOuX7mWbsj1h2o1pZNet-OAS5YCJyeI,2115
@@ -100,13 +97,12 @@ natural_pdf/utils/text_extraction.py,sha256=mDeN1_VevNi3RwvFe48PM5vBh-A5WeBlYgP6
100
97
  natural_pdf/utils/visualization.py,sha256=n3IZpbY5cf9LItzGavBcNyVZZrrUVxjYnmqZHYPa7NU,9386
101
98
  natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
102
99
  natural_pdf/widgets/viewer.py,sha256=2VUY1TzWMDe9I-IVNOosKZ2LaqpjLB62ftMAdk-s6_8,24952
103
- natural_pdf-0.1.30.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
100
+ natural_pdf-0.1.32.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
104
101
  optimization/memory_comparison.py,sha256=XEHtjduSmzXzxnsJMvemTcq-OAlvGUBAm5wwnOXq8TY,6524
105
102
  optimization/pdf_analyzer.py,sha256=G3XWhsEqIYbohEgTqz6wzxkAnOx4MkbvbSspx577-8w,19145
106
103
  optimization/performance_analysis.py,sha256=vVlFDywEXxhJLd9n2KVVqqQnS6rwWoHV_jlogboGF2k,13784
107
104
  optimization/test_cleanup_methods.py,sha256=B_zHiJr1hI8q-tdfBoFi0Jf5lj2PURjA_6teRBGoz8o,6277
108
105
  optimization/test_memory_fix.py,sha256=CWc0OSvFfKE0-nxqJOi_HAQc0GXUPKzkQbTeJp5UqxU,6364
109
- tools/rtl_smoke_test.py,sha256=-ogcbvNzumJasICP0NNQHk4Zb4M1VRx0TnGkJUQC7SM,3043
110
106
  tools/bad_pdf_eval/__init__.py,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
111
107
  tools/bad_pdf_eval/analyser.py,sha256=sR31aVVmTXRHS8uwLZXlPefTH2_lskxtAzuZwlhsyOo,13391
112
108
  tools/bad_pdf_eval/collate_summaries.py,sha256=Mcmf1OvVn0S0efj5ypk0syXKSrfUf6L5dowoGvOTgjU,5047
@@ -115,8 +111,8 @@ tools/bad_pdf_eval/export_enrichment_csv.py,sha256=SMEm9WxFUN_RIf8AGfZfjGEmvBvrO
115
111
  tools/bad_pdf_eval/llm_enrich.py,sha256=PsFMymPc8BNck21T3vupTN18pLdum-A_OLoJEKr6f80,12234
116
112
  tools/bad_pdf_eval/reporter.py,sha256=LIhcguDZ5XKgb0WeJsyA7m0kcliebOohzveShvt_KmY,400
117
113
  tools/bad_pdf_eval/utils.py,sha256=FuxaPX6f26IjQXu1vP0a2i9h1jgJNbASb8mRyj5-elE,4849
118
- natural_pdf-0.1.30.dist-info/METADATA,sha256=4Jg-iXXt6zGNE4gSYE_nMF395JDzv1Dierh93x1Lklo,6711
119
- natural_pdf-0.1.30.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
120
- natural_pdf-0.1.30.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
121
- natural_pdf-0.1.30.dist-info/top_level.txt,sha256=oZlRzSc3nZ9sV3L6kD_Di734Pp62ANrm46imFVa51qQ,58
122
- natural_pdf-0.1.30.dist-info/RECORD,,
114
+ natural_pdf-0.1.32.dist-info/METADATA,sha256=CMZIo2BjeLh-b9hezQHMLehZP8brUflCQ69dLtfFyxo,6711
115
+ natural_pdf-0.1.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
116
+ natural_pdf-0.1.32.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
117
+ natural_pdf-0.1.32.dist-info/top_level.txt,sha256=oZlRzSc3nZ9sV3L6kD_Di734Pp62ANrm46imFVa51qQ,58
118
+ natural_pdf-0.1.32.dist-info/RECORD,,