natural-pdf 0.1.31__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1319,6 +1319,28 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
1319
1319
  table_settings.setdefault("vertical_strategy", "lines")
1320
1320
  table_settings.setdefault("horizontal_strategy", "lines")
1321
1321
 
1322
+ # -------------------------------------------------------------
1323
+ # Auto-inject tolerances when text-based strategies are requested.
1324
+ # This must happen AFTER alias handling (so strategies are final)
1325
+ # and BEFORE we delegate to _extract_table_* helpers.
1326
+ # -------------------------------------------------------------
1327
+ if "text" in (table_settings.get("vertical_strategy"), table_settings.get("horizontal_strategy")):
1328
+ page_cfg = getattr(self.page, "_config", {})
1329
+ # Ensure text_* tolerances passed to pdfplumber
1330
+ if "text_x_tolerance" not in table_settings and "x_tolerance" not in table_settings:
1331
+ if page_cfg.get("x_tolerance") is not None:
1332
+ table_settings["text_x_tolerance"] = page_cfg["x_tolerance"]
1333
+ if "text_y_tolerance" not in table_settings and "y_tolerance" not in table_settings:
1334
+ if page_cfg.get("y_tolerance") is not None:
1335
+ table_settings["text_y_tolerance"] = page_cfg["y_tolerance"]
1336
+
1337
+ # Snap / join tolerances (~ line spacing)
1338
+ if "snap_tolerance" not in table_settings and "snap_x_tolerance" not in table_settings:
1339
+ snap = max(1, round((page_cfg.get("y_tolerance", 1)) * 0.9))
1340
+ table_settings["snap_tolerance"] = snap
1341
+ if "join_tolerance" not in table_settings and "join_x_tolerance" not in table_settings:
1342
+ table_settings["join_tolerance"] = table_settings["snap_tolerance"]
1343
+
1322
1344
  logger.debug(f"Region {self.bbox}: Extracting table using method '{effective_method}'")
1323
1345
 
1324
1346
  # Use the selected method
@@ -1438,6 +1460,30 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
1438
1460
  Returns:
1439
1461
  List of tables, where each table is a list of rows, and each row is a list of cell values
1440
1462
  """
1463
+ # Inject global PDF-level text tolerances if not explicitly present
1464
+ pdf_cfg = getattr(self.page, "_config", getattr(self.page._parent, "_config", {}))
1465
+ _uses_text = "text" in (
1466
+ table_settings.get("vertical_strategy"),
1467
+ table_settings.get("horizontal_strategy"),
1468
+ )
1469
+ if _uses_text and "text_x_tolerance" not in table_settings and "x_tolerance" not in table_settings:
1470
+ x_tol = pdf_cfg.get("x_tolerance")
1471
+ if x_tol is not None:
1472
+ table_settings.setdefault("text_x_tolerance", x_tol)
1473
+ if _uses_text and "text_y_tolerance" not in table_settings and "y_tolerance" not in table_settings:
1474
+ y_tol = pdf_cfg.get("y_tolerance")
1475
+ if y_tol is not None:
1476
+ table_settings.setdefault("text_y_tolerance", y_tol)
1477
+
1478
+ if _uses_text and "snap_tolerance" not in table_settings and "snap_x_tolerance" not in table_settings:
1479
+ snap = max(1, round((pdf_cfg.get("y_tolerance", 1)) * 0.9))
1480
+ table_settings.setdefault("snap_tolerance", snap)
1481
+ if _uses_text and "join_tolerance" not in table_settings and "join_x_tolerance" not in table_settings:
1482
+ join = table_settings.get("snap_tolerance", 1)
1483
+ table_settings.setdefault("join_tolerance", join)
1484
+ table_settings.setdefault("join_x_tolerance", join)
1485
+ table_settings.setdefault("join_y_tolerance", join)
1486
+
1441
1487
  # Create a crop of the page for this region
1442
1488
  cropped = self.page._page.crop(self.bbox)
1443
1489
 
@@ -1458,6 +1504,21 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
1458
1504
  Returns:
1459
1505
  Table data as a list of rows, where each row is a list of cell values
1460
1506
  """
1507
+ # Inject global PDF-level text tolerances if not explicitly present
1508
+ pdf_cfg = getattr(self.page, "_config", getattr(self.page._parent, "_config", {}))
1509
+ _uses_text = "text" in (
1510
+ table_settings.get("vertical_strategy"),
1511
+ table_settings.get("horizontal_strategy"),
1512
+ )
1513
+ if _uses_text and "text_x_tolerance" not in table_settings and "x_tolerance" not in table_settings:
1514
+ x_tol = pdf_cfg.get("x_tolerance")
1515
+ if x_tol is not None:
1516
+ table_settings.setdefault("text_x_tolerance", x_tol)
1517
+ if _uses_text and "text_y_tolerance" not in table_settings and "y_tolerance" not in table_settings:
1518
+ y_tol = pdf_cfg.get("y_tolerance")
1519
+ if y_tol is not None:
1520
+ table_settings.setdefault("text_y_tolerance", y_tol)
1521
+
1461
1522
  # Create a crop of the page for this region
1462
1523
  cropped = self.page._page.crop(self.bbox)
1463
1524
 
@@ -1943,21 +2004,45 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
1943
2004
  """
1944
2005
  Apply OCR to this region and return the created text elements.
1945
2006
 
2007
+ This method supports two modes:
2008
+ 1. **Built-in OCR Engines** (default) – identical to previous behaviour. Pass typical
2009
+ parameters like ``engine='easyocr'`` or ``languages=['en']`` and the method will
2010
+ route the request through :class:`OCRManager`.
2011
+ 2. **Custom OCR Function** – pass a *callable* under the keyword ``function`` (or
2012
+ ``ocr_function``). The callable will receive *this* Region instance and should
2013
+ return the extracted text (``str``) or ``None``. Internally the call is
2014
+ delegated to :pymeth:`apply_custom_ocr` so the same logic (replacement, element
2015
+ creation, etc.) is re-used.
2016
+
2017
+ Examples
2018
+ ---------
2019
+ >>> def llm_ocr(region):
2020
+ ... image = region.to_image(resolution=300, crop=True)
2021
+ ... return my_llm_client.ocr(image)
2022
+ >>> region.apply_ocr(function=llm_ocr)
2023
+
1946
2024
  Args:
1947
- replace: If True (default), removes existing OCR elements in the region
1948
- before adding new ones. If False, adds new OCR elements without
1949
- removing existing ones.
1950
- **ocr_params: Keyword arguments passed to the OCR Manager.
1951
- Common parameters like `engine`, `languages`, `min_confidence`,
1952
- `device`, and `resolution` (for image rendering) should be
1953
- provided here. **The `languages` list must contain codes
1954
- understood by the specific engine selected.** No mapping
1955
- is performed. Engine-specific settings can be passed in
1956
- an `options` object (e.g., `options=EasyOCROptions(...)`).
2025
+ replace: Whether to remove existing OCR elements first (default ``True``).
2026
+ **ocr_params: Parameters for the built-in OCR manager *or* the special
2027
+ ``function``/``ocr_function`` keyword to trigger custom mode.
2028
+
2029
+ Returns
2030
+ -------
2031
+ Self for chaining.
2032
+ """
2033
+ # --- Custom OCR function path --------------------------------------------------
2034
+ custom_func = ocr_params.pop("function", None) or ocr_params.pop("ocr_function", None)
2035
+ if callable(custom_func):
2036
+ # Delegate to the specialised helper while preserving key kwargs
2037
+ return self.apply_custom_ocr(
2038
+ ocr_function=custom_func,
2039
+ source_label=ocr_params.pop("source_label", "custom-ocr"),
2040
+ replace=replace,
2041
+ confidence=ocr_params.pop("confidence", None),
2042
+ add_to_page=ocr_params.pop("add_to_page", True),
2043
+ )
1957
2044
 
1958
- Returns:
1959
- Self for method chaining.
1960
- """
2045
+ # --- Original built-in OCR engine path (unchanged except docstring) ------------
1961
2046
  # Ensure OCRManager is available
1962
2047
  if not hasattr(self.page._parent, "_ocr_manager") or self.page._parent._ocr_manager is None:
1963
2048
  logger.error("OCRManager not available on parent PDF. Cannot apply OCR to region.")
@@ -2123,6 +2208,146 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
2123
2208
  logger.info(f"Region {self.bbox}: Added {len(created_elements)} elements from OCR.")
2124
2209
  return self
2125
2210
 
2211
+ def apply_custom_ocr(
2212
+ self,
2213
+ ocr_function: Callable[["Region"], Optional[str]],
2214
+ source_label: str = "custom-ocr",
2215
+ replace: bool = True,
2216
+ confidence: Optional[float] = None,
2217
+ add_to_page: bool = True,
2218
+ ) -> "Region":
2219
+ """
2220
+ Apply a custom OCR function to this region and create text elements from the results.
2221
+
2222
+ This is useful when you want to use a custom OCR method (e.g., an LLM API,
2223
+ specialized OCR service, or any custom logic) instead of the built-in OCR engines.
2224
+
2225
+ Args:
2226
+ ocr_function: A callable that takes a Region and returns the OCR'd text (or None).
2227
+ The function receives this region as its argument and should return
2228
+ the extracted text as a string, or None if no text was found.
2229
+ source_label: Label to identify the source of these text elements (default: "custom-ocr").
2230
+ This will be set as the 'source' attribute on created elements.
2231
+ replace: If True (default), removes existing OCR elements in this region before
2232
+ adding new ones. If False, adds new OCR elements alongside existing ones.
2233
+ confidence: Optional confidence score for the OCR result (0.0-1.0).
2234
+ If None, defaults to 1.0 if text is returned, 0.0 if None is returned.
2235
+ add_to_page: If True (default), adds the created text element to the page.
2236
+ If False, creates the element but doesn't add it to the page.
2237
+
2238
+ Returns:
2239
+ Self for method chaining.
2240
+
2241
+ Example:
2242
+ # Using with an LLM
2243
+ def ocr_with_llm(region):
2244
+ image = region.to_image(resolution=300, crop=True)
2245
+ # Call your LLM API here
2246
+ return llm_client.ocr(image)
2247
+
2248
+ region.apply_custom_ocr(ocr_with_llm)
2249
+
2250
+ # Using with a custom OCR service
2251
+ def ocr_with_service(region):
2252
+ img_bytes = region.to_image(crop=True).tobytes()
2253
+ response = ocr_service.process(img_bytes)
2254
+ return response.text
2255
+
2256
+ region.apply_custom_ocr(ocr_with_service, source_label="my-ocr-service")
2257
+ """
2258
+ # If replace is True, remove existing OCR elements in this region
2259
+ if replace:
2260
+ logger.info(
2261
+ f"Region {self.bbox}: Removing existing OCR elements before applying custom OCR."
2262
+ )
2263
+
2264
+ removed_count = 0
2265
+
2266
+ # Helper to remove a single element safely
2267
+ def _safe_remove(elem):
2268
+ nonlocal removed_count
2269
+ success = False
2270
+ if hasattr(elem, "page") and hasattr(elem.page, "_element_mgr"):
2271
+ etype = getattr(elem, "object_type", "word")
2272
+ if etype == "word":
2273
+ etype_key = "words"
2274
+ elif etype == "char":
2275
+ etype_key = "chars"
2276
+ else:
2277
+ etype_key = etype + "s" if not etype.endswith("s") else etype
2278
+ try:
2279
+ success = elem.page._element_mgr.remove_element(elem, etype_key)
2280
+ except Exception:
2281
+ success = False
2282
+ if success:
2283
+ removed_count += 1
2284
+
2285
+ # Remove ALL OCR elements overlapping this region
2286
+ # Remove elements with source=="ocr" (built-in OCR) or matching the source_label (previous custom OCR)
2287
+ for word in list(self.page._element_mgr.words):
2288
+ word_source = getattr(word, "source", "")
2289
+ # Match built-in OCR behavior: remove elements with source "ocr" exactly
2290
+ # Also remove elements with the same source_label to avoid duplicates
2291
+ if (word_source == "ocr" or word_source == source_label) and self.intersects(word):
2292
+ _safe_remove(word)
2293
+
2294
+ # Also remove char dicts if needed (matching built-in OCR)
2295
+ for char in list(self.page._element_mgr.chars):
2296
+ # char can be dict or TextElement; normalize
2297
+ char_src = char.get("source") if isinstance(char, dict) else getattr(char, "source", None)
2298
+ if char_src == "ocr" or char_src == source_label:
2299
+ # Rough bbox for dicts
2300
+ if isinstance(char, dict):
2301
+ cx0, ctop, cx1, cbottom = char.get("x0", 0), char.get("top", 0), char.get("x1", 0), char.get("bottom", 0)
2302
+ else:
2303
+ cx0, ctop, cx1, cbottom = char.x0, char.top, char.x1, char.bottom
2304
+ # Quick overlap check
2305
+ if not (cx1 < self.x0 or cx0 > self.x1 or cbottom < self.top or ctop > self.bottom):
2306
+ _safe_remove(char)
2307
+
2308
+ if removed_count > 0:
2309
+ logger.info(
2310
+ f"Region {self.bbox}: Removed {removed_count} existing OCR elements."
2311
+ )
2312
+
2313
+ # Call the custom OCR function
2314
+ try:
2315
+ logger.debug(f"Region {self.bbox}: Calling custom OCR function...")
2316
+ ocr_text = ocr_function(self)
2317
+
2318
+ if ocr_text is not None and not isinstance(ocr_text, str):
2319
+ logger.warning(
2320
+ f"Custom OCR function returned non-string type ({type(ocr_text)}). "
2321
+ f"Converting to string."
2322
+ )
2323
+ ocr_text = str(ocr_text)
2324
+
2325
+ except Exception as e:
2326
+ logger.error(
2327
+ f"Error calling custom OCR function for region {self.bbox}: {e}",
2328
+ exc_info=True
2329
+ )
2330
+ return self
2331
+
2332
+ # Create text element if we got text
2333
+ if ocr_text is not None:
2334
+ # Use the to_text_element method to create the element
2335
+ text_element = self.to_text_element(
2336
+ text_content=ocr_text,
2337
+ source_label=source_label,
2338
+ confidence=confidence,
2339
+ add_to_page=add_to_page
2340
+ )
2341
+
2342
+ logger.info(
2343
+ f"Region {self.bbox}: Created text element with {len(ocr_text)} chars"
2344
+ f"{' and added to page' if add_to_page else ''}"
2345
+ )
2346
+ else:
2347
+ logger.debug(f"Region {self.bbox}: Custom OCR function returned None (no text found)")
2348
+
2349
+ return self
2350
+
2126
2351
  def get_section_between(self, start_element=None, end_element=None, boundary_inclusion="both"):
2127
2352
  """
2128
2353
  Get a section between two elements within this region.
@@ -2917,6 +3142,33 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
2917
3142
  if not hasattr(self, "page") or self.page is None:
2918
3143
  raise ValueError("Region must have a valid 'page' attribute to create a TextElement.")
2919
3144
 
3145
+ # Create character dictionaries for the text
3146
+ char_dicts = []
3147
+ if actual_text:
3148
+ # Create a single character dict that spans the entire region
3149
+ # This is a simplified approach - OCR engines typically create one per character
3150
+ char_dict = {
3151
+ "text": actual_text,
3152
+ "x0": self.x0,
3153
+ "top": self.top,
3154
+ "x1": self.x1,
3155
+ "bottom": self.bottom,
3156
+ "width": self.width,
3157
+ "height": self.height,
3158
+ "object_type": "char",
3159
+ "page_number": self.page.page_number,
3160
+ "fontname": default_font_name,
3161
+ "size": default_font_size,
3162
+ "upright": True,
3163
+ "direction": 1,
3164
+ "adv": self.width,
3165
+ "source": source_label,
3166
+ "confidence": final_confidence,
3167
+ "stroking_color": (0, 0, 0),
3168
+ "non_stroking_color": (0, 0, 0),
3169
+ }
3170
+ char_dicts.append(char_dict)
3171
+
2920
3172
  elem_data = {
2921
3173
  "text": actual_text,
2922
3174
  "x0": self.x0,
@@ -2936,7 +3188,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
2936
3188
  "adv": self.width,
2937
3189
  "source": source_label,
2938
3190
  "confidence": final_confidence,
2939
- "_char_dicts": [],
3191
+ "_char_dicts": char_dicts,
2940
3192
  }
2941
3193
  text_element = TextElement(elem_data, self.page)
2942
3194
 
@@ -2952,6 +3204,10 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
2952
3204
  logger.debug(
2953
3205
  f"TextElement created from region {self.bbox} and added to page {self.page.page_number} as {add_as_type}."
2954
3206
  )
3207
+ # Also add character dictionaries to the chars collection
3208
+ if char_dicts and object_type == "word":
3209
+ for char_dict in char_dicts:
3210
+ self.page._element_mgr.add_element(char_dict, element_type="chars")
2955
3211
  else:
2956
3212
  page_num_str = (
2957
3213
  str(self.page.page_number) if hasattr(self.page, "page_number") else "N/A"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.31
3
+ Version: 0.1.33
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -1,11 +1,8 @@
1
- bad_pdf_analysis/analyze_10_more.py,sha256=UjsTuHE1GUMoVjkX3afy3x6DfpXyfZXHgS2W1GQqUmw,11906
2
- bad_pdf_analysis/analyze_final_10.py,sha256=xYkIId0nF9LpWHRLDP1_nlJfJfC0b0Tu4mLu-3mim-0,25170
3
- bad_pdf_analysis/analyze_specific_pages.py,sha256=wzq3_ZWR28hFdT7GEkayHPYgsk20OpD476LYmy2rAEk,13725
4
- bad_pdf_analysis/analyze_specific_pages_direct.py,sha256=307gSNplwOtNTR9a0lEQWxlAKGeoZIcDe5z1pROKUXY,14846
5
1
  natural_pdf/__init__.py,sha256=qDFJNF8sbEDO-2WSFAxoWEM8updOUP6dB-ckya0kxfs,3275
6
2
  natural_pdf/cli.py,sha256=IXrP2lCHihr-ed-CFiDbMTnSsutQa1j1PYALOLGbpsc,4019
7
- natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
8
- natural_pdf/analyzers/shape_detection_mixin.py,sha256=0a4uuKQ4Z1Ta_UVuUtX7mVhlwmXdAkoHTyC5wZyp5do,94455
3
+ natural_pdf/analyzers/__init__.py,sha256=MQRctn4i5Q7u8pb8vQVHKEXUiVGpKyPZUECrlDH4AuU,673
4
+ natural_pdf/analyzers/guides.py,sha256=tzyViSBDdM66mT0niwFTDIJ16UzRCZ18Iqv8wA5DYAk,90302
5
+ natural_pdf/analyzers/shape_detection_mixin.py,sha256=q7gDM-z2t7bSTxjfV2aaW3533CySu1qsEpu4wb5Rp-I,62688
9
6
  natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
10
7
  natural_pdf/analyzers/text_structure.py,sha256=VfKTsTFrK877sC0grsis9jK3rrgp0Mbp13VWEbukTcs,28437
11
8
  natural_pdf/analyzers/utils.py,sha256=PYbzJzSAHZ7JsMes84WIrSbA0zkjJGs0CLvIeINsf_k,2100
@@ -28,10 +25,10 @@ natural_pdf/classification/results.py,sha256=Mcay-xLBHbYoZ8U7f4gMj2IhhH_yORNEkZH
28
25
  natural_pdf/collections/mixins.py,sha256=sj76Cn6EdBtb5f-bdAV-1qpdixX8tI4BzPccPiYLI1w,5117
29
26
  natural_pdf/collections/pdf_collection.py,sha256=HLlyakM--23ZOeHDPucoM6Tw3yUyMXm0SSoqJwxRc2E,30744
30
27
  natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
31
- natural_pdf/core/element_manager.py,sha256=Mn4cYqPL_2LD_GK9lf2duExaJF1qhASCKsOdAZdQb00,49821
28
+ natural_pdf/core/element_manager.py,sha256=DbRzAKD3to5NpKc73Q-TXZIZkhx8zZtbi_UNu5K7AAU,52766
32
29
  natural_pdf/core/highlighting_service.py,sha256=WKDqRpex1yS8CWhkNitWtKhxbyRRCLu3Xsct_HTPsD4,40774
33
- natural_pdf/core/page.py,sha256=kQKKqsbOaNeLhW3ark6mueDS-4tsopJcGcoMmKPK6B8,125624
34
- natural_pdf/core/pdf.py,sha256=YfniZp54AyptzMyr7ZP8n617n4wlV28SPrajt32nNBk,74233
30
+ natural_pdf/core/page.py,sha256=k4jezvsLqL07Raglc-rZmMnsVwBMo_A_OerklpBIejY,129477
31
+ natural_pdf/core/pdf.py,sha256=u0ZCPuIijNecU-AJHLvqfAYVCr9h7MgUKnlEtH6RoZI,75969
35
32
  natural_pdf/describe/__init__.py,sha256=B3zjuHjFI_dFuBLgXR1Q4v7c72fVDyk84d2hs0H4KV8,561
36
33
  natural_pdf/describe/base.py,sha256=HaWlHltb-dw6ug4mfR_iBLHWxr1OdPwLaUshXRxO7gg,18462
37
34
  natural_pdf/describe/elements.py,sha256=COvKF3B_RbAxXl5ORJDubV4C5PsiuSfuzD0ufPIJTFM,12983
@@ -39,11 +36,11 @@ natural_pdf/describe/mixin.py,sha256=U0x6v8r57KQb8qC3VVo64hvhfXQWsti8vdKBM7AXnMo
39
36
  natural_pdf/describe/summary.py,sha256=7FIF3zF6bzNx-gx4pCJr2XQFKiVzOEDnWsAYQ_mr9L0,7982
40
37
  natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
41
38
  natural_pdf/elements/base.py,sha256=VshU4RstdzONJFq_8UVIjT_lVOai0MwMFsSFrCN-IO8,47299
42
- natural_pdf/elements/collections.py,sha256=52Oac96svzm_QMJcVaItnCG9P7d6JMNiGEx9lHgDEQg,125915
39
+ natural_pdf/elements/collections.py,sha256=1E2MSg2NNcEcoRM2rumrv_CqIdO7DgbRHYEtfw35FaQ,128457
43
40
  natural_pdf/elements/image.py,sha256=UjHNzCgDzOseQmLpkKshcxg51DPmWNIAVYxZ0TAMyUI,1423
44
41
  natural_pdf/elements/line.py,sha256=aQm4pDdlQSDAAXqrdg4AU-oTl9JCXgYuaJN0EYls6E0,4920
45
42
  natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
46
- natural_pdf/elements/region.py,sha256=v1PzWvQoGHGdn7SQiPf4Oq3hIGueIfYGwcZ05ZU6XPE,127692
43
+ natural_pdf/elements/region.py,sha256=23J5Tv7ffAgz3IBgDXPq9Ab_lLg2Sog7elFRb6nvvZE,140541
47
44
  natural_pdf/elements/text.py,sha256=kw7u2KfHtDB905YawP7Hs89kcR8XnbtpkYQGEk6LNyk,18860
48
45
  natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
49
46
  natural_pdf/exporters/__init__.py,sha256=g1WRPCDVzceaUUsm8dchPhzdHFSjYM0NfRyc8iN0mtE,644
@@ -100,13 +97,12 @@ natural_pdf/utils/text_extraction.py,sha256=mDeN1_VevNi3RwvFe48PM5vBh-A5WeBlYgP6
100
97
  natural_pdf/utils/visualization.py,sha256=n3IZpbY5cf9LItzGavBcNyVZZrrUVxjYnmqZHYPa7NU,9386
101
98
  natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
102
99
  natural_pdf/widgets/viewer.py,sha256=2VUY1TzWMDe9I-IVNOosKZ2LaqpjLB62ftMAdk-s6_8,24952
103
- natural_pdf-0.1.31.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
100
+ natural_pdf-0.1.33.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
104
101
  optimization/memory_comparison.py,sha256=XEHtjduSmzXzxnsJMvemTcq-OAlvGUBAm5wwnOXq8TY,6524
105
102
  optimization/pdf_analyzer.py,sha256=G3XWhsEqIYbohEgTqz6wzxkAnOx4MkbvbSspx577-8w,19145
106
103
  optimization/performance_analysis.py,sha256=vVlFDywEXxhJLd9n2KVVqqQnS6rwWoHV_jlogboGF2k,13784
107
104
  optimization/test_cleanup_methods.py,sha256=B_zHiJr1hI8q-tdfBoFi0Jf5lj2PURjA_6teRBGoz8o,6277
108
105
  optimization/test_memory_fix.py,sha256=CWc0OSvFfKE0-nxqJOi_HAQc0GXUPKzkQbTeJp5UqxU,6364
109
- tools/rtl_smoke_test.py,sha256=-ogcbvNzumJasICP0NNQHk4Zb4M1VRx0TnGkJUQC7SM,3043
110
106
  tools/bad_pdf_eval/__init__.py,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
111
107
  tools/bad_pdf_eval/analyser.py,sha256=sR31aVVmTXRHS8uwLZXlPefTH2_lskxtAzuZwlhsyOo,13391
112
108
  tools/bad_pdf_eval/collate_summaries.py,sha256=Mcmf1OvVn0S0efj5ypk0syXKSrfUf6L5dowoGvOTgjU,5047
@@ -115,8 +111,8 @@ tools/bad_pdf_eval/export_enrichment_csv.py,sha256=SMEm9WxFUN_RIf8AGfZfjGEmvBvrO
115
111
  tools/bad_pdf_eval/llm_enrich.py,sha256=PsFMymPc8BNck21T3vupTN18pLdum-A_OLoJEKr6f80,12234
116
112
  tools/bad_pdf_eval/reporter.py,sha256=LIhcguDZ5XKgb0WeJsyA7m0kcliebOohzveShvt_KmY,400
117
113
  tools/bad_pdf_eval/utils.py,sha256=FuxaPX6f26IjQXu1vP0a2i9h1jgJNbASb8mRyj5-elE,4849
118
- natural_pdf-0.1.31.dist-info/METADATA,sha256=tqimu2ZReyYu5pS0PsbCo-Z9fIzkpMj1ljGPNbaOFss,6711
119
- natural_pdf-0.1.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
120
- natural_pdf-0.1.31.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
121
- natural_pdf-0.1.31.dist-info/top_level.txt,sha256=oZlRzSc3nZ9sV3L6kD_Di734Pp62ANrm46imFVa51qQ,58
122
- natural_pdf-0.1.31.dist-info/RECORD,,
114
+ natural_pdf-0.1.33.dist-info/METADATA,sha256=mSAwh3vuD9aRvO_AC_XBZG5sw9SeiuidC86a7kuV--I,6711
115
+ natural_pdf-0.1.33.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
116
+ natural_pdf-0.1.33.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
117
+ natural_pdf-0.1.33.dist-info/top_level.txt,sha256=oZlRzSc3nZ9sV3L6kD_Di734Pp62ANrm46imFVa51qQ,58
118
+ natural_pdf-0.1.33.dist-info/RECORD,,