natural-pdf 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/analyzers/shape_detection_mixin.py +554 -273
- natural_pdf/core/page.py +127 -13
- natural_pdf/elements/base.py +20 -20
- natural_pdf/elements/region.py +167 -33
- natural_pdf/flows/element.py +2 -2
- {natural_pdf-0.1.13.dist-info → natural_pdf-0.1.15.dist-info}/METADATA +1 -1
- {natural_pdf-0.1.13.dist-info → natural_pdf-0.1.15.dist-info}/RECORD +10 -10
- {natural_pdf-0.1.13.dist-info → natural_pdf-0.1.15.dist-info}/WHEEL +0 -0
- {natural_pdf-0.1.13.dist-info → natural_pdf-0.1.15.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.1.13.dist-info → natural_pdf-0.1.15.dist-info}/top_level.txt +0 -0
natural_pdf/elements/region.py
CHANGED
@@ -106,7 +106,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
106
106
|
direction: str,
|
107
107
|
size: Optional[float] = None,
|
108
108
|
cross_size: str = "full",
|
109
|
-
|
109
|
+
include_source: bool = False,
|
110
110
|
until: Optional[str] = None,
|
111
111
|
include_endpoint: bool = True,
|
112
112
|
**kwargs,
|
@@ -118,7 +118,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
118
118
|
direction: 'left', 'right', 'above', or 'below'
|
119
119
|
size: Size in the primary direction (width for horizontal, height for vertical)
|
120
120
|
cross_size: Size in the cross direction ('full' or 'element')
|
121
|
-
|
121
|
+
include_source: Whether to include this region's area in the result
|
122
122
|
until: Optional selector string to specify a boundary element
|
123
123
|
include_endpoint: Whether to include the boundary element found by 'until'
|
124
124
|
**kwargs: Additional parameters for the 'until' selector search
|
@@ -132,7 +132,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
132
132
|
is_positive = direction in ("right", "below") # right/below are positive directions
|
133
133
|
pixel_offset = 1 # Offset for excluding elements/endpoints
|
134
134
|
|
135
|
-
# 1. Determine initial boundaries based on direction and
|
135
|
+
# 1. Determine initial boundaries based on direction and include_source
|
136
136
|
if is_horizontal:
|
137
137
|
# Initial cross-boundaries (vertical)
|
138
138
|
y0 = 0 if cross_size == "full" else self.top
|
@@ -140,11 +140,11 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
140
140
|
|
141
141
|
# Initial primary boundaries (horizontal)
|
142
142
|
if is_positive: # right
|
143
|
-
x0_initial = self.x0 if
|
143
|
+
x0_initial = self.x0 if include_source else self.x1 + pixel_offset
|
144
144
|
x1_initial = self.x1 # This edge moves
|
145
145
|
else: # left
|
146
146
|
x0_initial = self.x0 # This edge moves
|
147
|
-
x1_initial = self.x1 if
|
147
|
+
x1_initial = self.x1 if include_source else self.x0 - pixel_offset
|
148
148
|
else: # Vertical
|
149
149
|
# Initial cross-boundaries (horizontal)
|
150
150
|
x0 = 0 if cross_size == "full" else self.x0
|
@@ -152,11 +152,11 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
152
152
|
|
153
153
|
# Initial primary boundaries (vertical)
|
154
154
|
if is_positive: # below
|
155
|
-
y0_initial = self.top if
|
155
|
+
y0_initial = self.top if include_source else self.bottom + pixel_offset
|
156
156
|
y1_initial = self.bottom # This edge moves
|
157
157
|
else: # above
|
158
158
|
y0_initial = self.top # This edge moves
|
159
|
-
y1_initial = self.bottom if
|
159
|
+
y1_initial = self.bottom if include_source else self.top - pixel_offset
|
160
160
|
|
161
161
|
# 2. Calculate the final primary boundary, considering 'size' or page limits
|
162
162
|
if is_horizontal:
|
@@ -248,7 +248,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
248
248
|
# 5. Create and return Region
|
249
249
|
region = Region(self.page, final_bbox)
|
250
250
|
region.source_element = self
|
251
|
-
region.includes_source =
|
251
|
+
region.includes_source = include_source
|
252
252
|
# Optionally store the boundary element if found
|
253
253
|
if target:
|
254
254
|
region.boundary_element = target
|
@@ -259,7 +259,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
259
259
|
self,
|
260
260
|
height: Optional[float] = None,
|
261
261
|
width: str = "full",
|
262
|
-
|
262
|
+
include_source: bool = False,
|
263
263
|
until: Optional[str] = None,
|
264
264
|
include_endpoint: bool = True,
|
265
265
|
**kwargs,
|
@@ -270,7 +270,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
270
270
|
Args:
|
271
271
|
height: Height of the region above, in points
|
272
272
|
width: Width mode - "full" for full page width or "element" for element width
|
273
|
-
|
273
|
+
include_source: Whether to include this region in the result (default: False)
|
274
274
|
until: Optional selector string to specify an upper boundary element
|
275
275
|
include_endpoint: Whether to include the boundary element in the region (default: True)
|
276
276
|
**kwargs: Additional parameters
|
@@ -282,7 +282,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
282
282
|
direction="above",
|
283
283
|
size=height,
|
284
284
|
cross_size=width,
|
285
|
-
|
285
|
+
include_source=include_source,
|
286
286
|
until=until,
|
287
287
|
include_endpoint=include_endpoint,
|
288
288
|
**kwargs,
|
@@ -292,7 +292,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
292
292
|
self,
|
293
293
|
height: Optional[float] = None,
|
294
294
|
width: str = "full",
|
295
|
-
|
295
|
+
include_source: bool = False,
|
296
296
|
until: Optional[str] = None,
|
297
297
|
include_endpoint: bool = True,
|
298
298
|
**kwargs,
|
@@ -303,7 +303,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
303
303
|
Args:
|
304
304
|
height: Height of the region below, in points
|
305
305
|
width: Width mode - "full" for full page width or "element" for element width
|
306
|
-
|
306
|
+
include_source: Whether to include this region in the result (default: False)
|
307
307
|
until: Optional selector string to specify a lower boundary element
|
308
308
|
include_endpoint: Whether to include the boundary element in the region (default: True)
|
309
309
|
**kwargs: Additional parameters
|
@@ -315,7 +315,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
315
315
|
direction="below",
|
316
316
|
size=height,
|
317
317
|
cross_size=width,
|
318
|
-
|
318
|
+
include_source=include_source,
|
319
319
|
until=until,
|
320
320
|
include_endpoint=include_endpoint,
|
321
321
|
**kwargs,
|
@@ -325,7 +325,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
325
325
|
self,
|
326
326
|
width: Optional[float] = None,
|
327
327
|
height: str = "full",
|
328
|
-
|
328
|
+
include_source: bool = False,
|
329
329
|
until: Optional[str] = None,
|
330
330
|
include_endpoint: bool = True,
|
331
331
|
**kwargs,
|
@@ -336,7 +336,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
336
336
|
Args:
|
337
337
|
width: Width of the region to the left, in points
|
338
338
|
height: Height mode - "full" for full page height or "element" for element height
|
339
|
-
|
339
|
+
include_source: Whether to include this region in the result (default: False)
|
340
340
|
until: Optional selector string to specify a left boundary element
|
341
341
|
include_endpoint: Whether to include the boundary element in the region (default: True)
|
342
342
|
**kwargs: Additional parameters
|
@@ -348,7 +348,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
348
348
|
direction="left",
|
349
349
|
size=width,
|
350
350
|
cross_size=height,
|
351
|
-
|
351
|
+
include_source=include_source,
|
352
352
|
until=until,
|
353
353
|
include_endpoint=include_endpoint,
|
354
354
|
**kwargs,
|
@@ -358,7 +358,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
358
358
|
self,
|
359
359
|
width: Optional[float] = None,
|
360
360
|
height: str = "full",
|
361
|
-
|
361
|
+
include_source: bool = False,
|
362
362
|
until: Optional[str] = None,
|
363
363
|
include_endpoint: bool = True,
|
364
364
|
**kwargs,
|
@@ -369,7 +369,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
369
369
|
Args:
|
370
370
|
width: Width of the region to the right, in points
|
371
371
|
height: Height mode - "full" for full page height or "element" for element height
|
372
|
-
|
372
|
+
include_source: Whether to include this region in the result (default: False)
|
373
373
|
until: Optional selector string to specify a right boundary element
|
374
374
|
include_endpoint: Whether to include the boundary element in the region (default: True)
|
375
375
|
**kwargs: Additional parameters
|
@@ -381,7 +381,7 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
381
381
|
direction="right",
|
382
382
|
size=width,
|
383
383
|
cross_size=height,
|
384
|
-
|
384
|
+
include_source=include_source,
|
385
385
|
until=until,
|
386
386
|
include_endpoint=include_endpoint,
|
387
387
|
**kwargs,
|
@@ -1247,8 +1247,12 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
1247
1247
|
Extract a table from this region.
|
1248
1248
|
|
1249
1249
|
Args:
|
1250
|
-
method: Method to use: 'tatr', '
|
1251
|
-
|
1250
|
+
method: Method to use: 'tatr', 'pdfplumber', 'text', 'stream', 'lattice', or None (auto-detect).
|
1251
|
+
'stream' is an alias for 'pdfplumber' with text-based strategies (equivalent to
|
1252
|
+
setting `vertical_strategy` and `horizontal_strategy` to 'text').
|
1253
|
+
'lattice' is an alias for 'pdfplumber' with line-based strategies (equivalent to
|
1254
|
+
setting `vertical_strategy` and `horizontal_strategy` to 'lines').
|
1255
|
+
table_settings: Settings for pdfplumber table extraction (used with 'pdfplumber', 'stream', or 'lattice' methods).
|
1252
1256
|
use_ocr: Whether to use OCR for text extraction (currently only applicable with 'tatr' method).
|
1253
1257
|
ocr_config: OCR configuration parameters.
|
1254
1258
|
text_options: Dictionary of options for the 'text' method, corresponding to arguments
|
@@ -1268,13 +1272,47 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
1268
1272
|
text_options = {} # Initialize empty dict
|
1269
1273
|
|
1270
1274
|
# Auto-detect method if not specified
|
1271
|
-
|
1272
|
-
if effective_method is None:
|
1275
|
+
if method is None:
|
1273
1276
|
# If this is a TATR-detected region, use TATR method
|
1274
1277
|
if hasattr(self, "model") and self.model == "tatr" and self.region_type == "table":
|
1275
1278
|
effective_method = "tatr"
|
1276
1279
|
else:
|
1277
|
-
|
1280
|
+
# Try lattice first, then fall back to stream if no meaningful results
|
1281
|
+
logger.debug(f"Region {self.bbox}: Auto-detecting table extraction method...")
|
1282
|
+
|
1283
|
+
try:
|
1284
|
+
logger.debug(f"Region {self.bbox}: Trying 'lattice' method first...")
|
1285
|
+
lattice_result = self.extract_table('lattice', table_settings=table_settings.copy())
|
1286
|
+
|
1287
|
+
# Check if lattice found meaningful content
|
1288
|
+
if (lattice_result and len(lattice_result) > 0 and
|
1289
|
+
any(any(cell and cell.strip() for cell in row if cell) for row in lattice_result)):
|
1290
|
+
logger.debug(f"Region {self.bbox}: 'lattice' method found table with {len(lattice_result)} rows")
|
1291
|
+
return lattice_result
|
1292
|
+
else:
|
1293
|
+
logger.debug(f"Region {self.bbox}: 'lattice' method found no meaningful content")
|
1294
|
+
except Exception as e:
|
1295
|
+
logger.debug(f"Region {self.bbox}: 'lattice' method failed: {e}")
|
1296
|
+
|
1297
|
+
# Fall back to stream
|
1298
|
+
logger.debug(f"Region {self.bbox}: Falling back to 'stream' method...")
|
1299
|
+
return self.extract_table('stream', table_settings=table_settings.copy())
|
1300
|
+
else:
|
1301
|
+
effective_method = method
|
1302
|
+
|
1303
|
+
# Handle method aliases for pdfplumber
|
1304
|
+
if effective_method == "stream":
|
1305
|
+
logger.debug("Using 'stream' method alias for 'pdfplumber' with text-based strategies.")
|
1306
|
+
effective_method = "pdfplumber"
|
1307
|
+
# Set default text strategies if not already provided by the user
|
1308
|
+
table_settings.setdefault("vertical_strategy", "text")
|
1309
|
+
table_settings.setdefault("horizontal_strategy", "text")
|
1310
|
+
elif effective_method == "lattice":
|
1311
|
+
logger.debug("Using 'lattice' method alias for 'pdfplumber' with line-based strategies.")
|
1312
|
+
effective_method = "pdfplumber"
|
1313
|
+
# Set default line strategies if not already provided by the user
|
1314
|
+
table_settings.setdefault("vertical_strategy", "lines")
|
1315
|
+
table_settings.setdefault("horizontal_strategy", "lines")
|
1278
1316
|
|
1279
1317
|
logger.debug(f"Region {self.bbox}: Extracting table using method '{effective_method}'")
|
1280
1318
|
|
@@ -1284,19 +1322,115 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
1284
1322
|
elif effective_method == "text":
|
1285
1323
|
current_text_options = text_options.copy()
|
1286
1324
|
current_text_options["cell_extraction_func"] = cell_extraction_func
|
1287
|
-
# --- Pass show_progress to the helper --- #
|
1288
1325
|
current_text_options["show_progress"] = show_progress
|
1289
1326
|
return self._extract_table_text(**current_text_options)
|
1290
|
-
elif effective_method == "
|
1327
|
+
elif effective_method == "pdfplumber":
|
1291
1328
|
return self._extract_table_plumber(table_settings)
|
1292
1329
|
else:
|
1293
1330
|
raise ValueError(
|
1294
|
-
f"Unknown table extraction method: '{
|
1331
|
+
f"Unknown table extraction method: '{method}'. Choose from 'tatr', 'pdfplumber', 'text', 'stream', 'lattice'."
|
1295
1332
|
)
|
1296
1333
|
|
1334
|
+
|
1335
|
+
def extract_tables(
|
1336
|
+
self,
|
1337
|
+
method: Optional[str] = None,
|
1338
|
+
table_settings: Optional[dict] = None,
|
1339
|
+
) -> List[List[List[str]]]:
|
1340
|
+
"""
|
1341
|
+
Extract all tables from this region using pdfplumber-based methods.
|
1342
|
+
|
1343
|
+
Note: Only 'pdfplumber', 'stream', and 'lattice' methods are supported for extract_tables.
|
1344
|
+
'tatr' and 'text' methods are designed for single table extraction only.
|
1345
|
+
|
1346
|
+
Args:
|
1347
|
+
method: Method to use: 'pdfplumber', 'stream', 'lattice', or None (auto-detect).
|
1348
|
+
'stream' uses text-based strategies, 'lattice' uses line-based strategies.
|
1349
|
+
table_settings: Settings for pdfplumber table extraction.
|
1350
|
+
|
1351
|
+
Returns:
|
1352
|
+
List of tables, where each table is a list of rows, and each row is a list of cell values.
|
1353
|
+
"""
|
1354
|
+
if table_settings is None:
|
1355
|
+
table_settings = {}
|
1356
|
+
|
1357
|
+
# Auto-detect method if not specified (try lattice first, then stream)
|
1358
|
+
if method is None:
|
1359
|
+
logger.debug(f"Region {self.bbox}: Auto-detecting tables extraction method...")
|
1360
|
+
|
1361
|
+
# Try lattice first
|
1362
|
+
try:
|
1363
|
+
lattice_settings = table_settings.copy()
|
1364
|
+
lattice_settings.setdefault("vertical_strategy", "lines")
|
1365
|
+
lattice_settings.setdefault("horizontal_strategy", "lines")
|
1366
|
+
|
1367
|
+
logger.debug(f"Region {self.bbox}: Trying 'lattice' method first for tables...")
|
1368
|
+
lattice_result = self._extract_tables_plumber(lattice_settings)
|
1369
|
+
|
1370
|
+
# Check if lattice found meaningful tables
|
1371
|
+
if (lattice_result and len(lattice_result) > 0 and
|
1372
|
+
any(any(any(cell and cell.strip() for cell in row if cell) for row in table if table) for table in lattice_result)):
|
1373
|
+
logger.debug(f"Region {self.bbox}: 'lattice' method found {len(lattice_result)} tables")
|
1374
|
+
return lattice_result
|
1375
|
+
else:
|
1376
|
+
logger.debug(f"Region {self.bbox}: 'lattice' method found no meaningful tables")
|
1377
|
+
|
1378
|
+
except Exception as e:
|
1379
|
+
logger.debug(f"Region {self.bbox}: 'lattice' method failed: {e}")
|
1380
|
+
|
1381
|
+
# Fall back to stream
|
1382
|
+
logger.debug(f"Region {self.bbox}: Falling back to 'stream' method for tables...")
|
1383
|
+
stream_settings = table_settings.copy()
|
1384
|
+
stream_settings.setdefault("vertical_strategy", "text")
|
1385
|
+
stream_settings.setdefault("horizontal_strategy", "text")
|
1386
|
+
|
1387
|
+
return self._extract_tables_plumber(stream_settings)
|
1388
|
+
|
1389
|
+
effective_method = method
|
1390
|
+
|
1391
|
+
# Handle method aliases
|
1392
|
+
if effective_method == "stream":
|
1393
|
+
logger.debug("Using 'stream' method alias for 'pdfplumber' with text-based strategies.")
|
1394
|
+
effective_method = "pdfplumber"
|
1395
|
+
table_settings.setdefault("vertical_strategy", "text")
|
1396
|
+
table_settings.setdefault("horizontal_strategy", "text")
|
1397
|
+
elif effective_method == "lattice":
|
1398
|
+
logger.debug("Using 'lattice' method alias for 'pdfplumber' with line-based strategies.")
|
1399
|
+
effective_method = "pdfplumber"
|
1400
|
+
table_settings.setdefault("vertical_strategy", "lines")
|
1401
|
+
table_settings.setdefault("horizontal_strategy", "lines")
|
1402
|
+
|
1403
|
+
# Use the selected method
|
1404
|
+
if effective_method == "pdfplumber":
|
1405
|
+
return self._extract_tables_plumber(table_settings)
|
1406
|
+
else:
|
1407
|
+
raise ValueError(
|
1408
|
+
f"Unknown tables extraction method: '{method}'. Choose from 'pdfplumber', 'stream', 'lattice'."
|
1409
|
+
)
|
1410
|
+
|
1411
|
+
def _extract_tables_plumber(self, table_settings: dict) -> List[List[List[str]]]:
|
1412
|
+
"""
|
1413
|
+
Extract all tables using pdfplumber's table extraction.
|
1414
|
+
|
1415
|
+
Args:
|
1416
|
+
table_settings: Settings for pdfplumber table extraction
|
1417
|
+
|
1418
|
+
Returns:
|
1419
|
+
List of tables, where each table is a list of rows, and each row is a list of cell values
|
1420
|
+
"""
|
1421
|
+
# Create a crop of the page for this region
|
1422
|
+
cropped = self.page._page.crop(self.bbox)
|
1423
|
+
|
1424
|
+
# Extract all tables from the cropped area
|
1425
|
+
tables = cropped.extract_tables(table_settings)
|
1426
|
+
|
1427
|
+
# Return the tables or an empty list if none found
|
1428
|
+
return tables if tables else []
|
1429
|
+
|
1297
1430
|
def _extract_table_plumber(self, table_settings: dict) -> List[List[str]]:
|
1298
1431
|
"""
|
1299
1432
|
Extract table using pdfplumber's table extraction.
|
1433
|
+
This method extracts the largest table within the region.
|
1300
1434
|
|
1301
1435
|
Args:
|
1302
1436
|
table_settings: Settings for pdfplumber table extraction
|
@@ -1307,12 +1441,12 @@ class Region(DirectionalMixin, ClassificationMixin, ExtractionMixin, ShapeDetect
|
|
1307
1441
|
# Create a crop of the page for this region
|
1308
1442
|
cropped = self.page._page.crop(self.bbox)
|
1309
1443
|
|
1310
|
-
# Extract table from the cropped area
|
1311
|
-
|
1444
|
+
# Extract the single largest table from the cropped area
|
1445
|
+
table = cropped.extract_table(table_settings)
|
1312
1446
|
|
1313
|
-
# Return the
|
1314
|
-
if
|
1315
|
-
return
|
1447
|
+
# Return the table or an empty list if none found
|
1448
|
+
if table:
|
1449
|
+
return table
|
1316
1450
|
return []
|
1317
1451
|
|
1318
1452
|
def _extract_table_tatr(self, use_ocr=False, ocr_config=None) -> List[List[str]]:
|
natural_pdf/flows/element.py
CHANGED
@@ -175,7 +175,7 @@ class FlowElement:
|
|
175
175
|
if current_segment_idx == start_segment_index:
|
176
176
|
op_source = self.physical_object
|
177
177
|
op_direction_params["size"] = remaining_size if size is not None else None
|
178
|
-
op_direction_params["
|
178
|
+
op_direction_params["include_source"] = False
|
179
179
|
|
180
180
|
source_for_op_call = op_source
|
181
181
|
if not isinstance(source_for_op_call, PhysicalRegion_Class):
|
@@ -191,7 +191,7 @@ class FlowElement:
|
|
191
191
|
"size": remaining_size if size is not None else None,
|
192
192
|
"cross_size": cross_size_for_op,
|
193
193
|
"cross_alignment": cross_alignment, # Pass alignment
|
194
|
-
"
|
194
|
+
"include_source": False,
|
195
195
|
# Pass other relevant kwargs if Region._direction uses them (e.g. strict_type)
|
196
196
|
**{k: v for k, v in kwargs.items() if k in ['strict_type', 'first_match_only']}
|
197
197
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
natural_pdf/__init__.py,sha256=0sCYgb9BAV5OnpD_1AswMuOLuXNmpe3OLJpv_6p3tgw,2449
|
2
2
|
natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
|
3
|
-
natural_pdf/analyzers/shape_detection_mixin.py,sha256=
|
3
|
+
natural_pdf/analyzers/shape_detection_mixin.py,sha256=6IXKuifC2QDurW8_gMakZCetTHSdOUK5Ye_B-P4EQMA,75381
|
4
4
|
natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
|
5
5
|
natural_pdf/analyzers/text_structure.py,sha256=VfKTsTFrK877sC0grsis9jK3rrgp0Mbp13VWEbukTcs,28437
|
6
6
|
natural_pdf/analyzers/utils.py,sha256=PYbzJzSAHZ7JsMes84WIrSbA0zkjJGs0CLvIeINsf_k,2100
|
@@ -24,14 +24,14 @@ natural_pdf/collections/pdf_collection.py,sha256=nsbrzcsXAD2qVLLXhDYpljAb_WnjMNa
|
|
24
24
|
natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
|
25
25
|
natural_pdf/core/element_manager.py,sha256=_UdXu51sLi6STzc8Pj4k8R721G3yJixXDLuRHn3hmr8,25731
|
26
26
|
natural_pdf/core/highlighting_service.py,sha256=tjMJpdJj2oaMGpdqiNHPcTJqID4nd-uBZ5v7KtPmoc0,36762
|
27
|
-
natural_pdf/core/page.py,sha256=
|
27
|
+
natural_pdf/core/page.py,sha256=M-KgTxceFebw0n1BehFAeQ0sxnCpIr9dZX10k2OJzUY,117518
|
28
28
|
natural_pdf/core/pdf.py,sha256=395aBTg4Le4vABvQWgBhPm669nGJ8JdMToTs1UtQ2Vg,69575
|
29
29
|
natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
|
30
|
-
natural_pdf/elements/base.py,sha256=
|
30
|
+
natural_pdf/elements/base.py,sha256=NNF-iUzkip0UgfKTuqLE1jVJsq2yD7LUTvOQWMi_Jpc,39631
|
31
31
|
natural_pdf/elements/collections.py,sha256=qd58tD3f-eojz90ICytlqu4Ej0OQoWgsxV4umQDhUvA,120809
|
32
32
|
natural_pdf/elements/line.py,sha256=300kSFBDUBIudfeQtH_tzW9gTYRgRKUDPiTABw6J-BE,4782
|
33
33
|
natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
|
34
|
-
natural_pdf/elements/region.py,sha256=
|
34
|
+
natural_pdf/elements/region.py,sha256=l9J6E7bAkxZoA603cfPKG1LuU7uRUPl4PArUBkuk7VI,122719
|
35
35
|
natural_pdf/elements/text.py,sha256=13HvVZGinj2Vm_fFCAnqi7hohtoKvnpCp3VCfkpeAbc,11146
|
36
36
|
natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
|
37
37
|
natural_pdf/exporters/__init__.py,sha256=7MnvRLLQdwtg-ULu-8uK8C84GsKiJamyhRw_GgWhw7k,151
|
@@ -49,7 +49,7 @@ natural_pdf/extraction/mixin.py,sha256=eKbr70VibpbtfjvCE80lTFuYHzq_BoVtOHjznL_GM
|
|
49
49
|
natural_pdf/extraction/result.py,sha256=c1vLguCR6l95cvg-BJJmZvL_MPg2McJaczge55bKZMg,934
|
50
50
|
natural_pdf/flows/__init__.py,sha256=82ibI0eNJfVergEsTyom9Nxe_T6pnWQsr4-CISGQlz0,277
|
51
51
|
natural_pdf/flows/collections.py,sha256=iOmRqM5K74kqioh7-UAbNgkpXMr9nkZZ5oW4_sQ1Alo,26433
|
52
|
-
natural_pdf/flows/element.py,sha256=
|
52
|
+
natural_pdf/flows/element.py,sha256=HMlSBjnQH3CF89wDwc0qBpAfmMmHHK_5LeY1TYeqtTs,20564
|
53
53
|
natural_pdf/flows/flow.py,sha256=ft07Ou0uRodF_gTgumVlU9YUquE3LTZz5LEAoQGErEs,10375
|
54
54
|
natural_pdf/flows/region.py,sha256=5xAnePZjs292oKrGG5El3pwhpxaHQYLzse35ilswhqI,21298
|
55
55
|
natural_pdf/ocr/__init__.py,sha256=VY8hhvDPf7Gh2lB-d2QRmghLLyTy6ydxlgo1cS4dOSk,2482
|
@@ -84,8 +84,8 @@ natural_pdf/utils/text_extraction.py,sha256=z6Jhy11pakYCsEpkvh8ldw6DkUFsYF1hCL9Y
|
|
84
84
|
natural_pdf/utils/visualization.py,sha256=30pRWQdsRJh2pSObh-brKVsFgC1n8tHmSrta_UDnVPw,8989
|
85
85
|
natural_pdf/widgets/__init__.py,sha256=O2fSDo604wDAP6UwUkmBq3eT91RSqHwBpAOQXq92S8s,214
|
86
86
|
natural_pdf/widgets/viewer.py,sha256=ekgXTEfA48GrR-JjpCpgyBCXdf4IubV0pAXDJozcU7A,39196
|
87
|
-
natural_pdf-0.1.
|
88
|
-
natural_pdf-0.1.
|
89
|
-
natural_pdf-0.1.
|
90
|
-
natural_pdf-0.1.
|
91
|
-
natural_pdf-0.1.
|
87
|
+
natural_pdf-0.1.15.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
|
88
|
+
natural_pdf-0.1.15.dist-info/METADATA,sha256=O8RUOiFgln7unuRhKey0Z6l90K71ktMY7WwpaiEyZdc,7674
|
89
|
+
natural_pdf-0.1.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
90
|
+
natural_pdf-0.1.15.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
|
91
|
+
natural_pdf-0.1.15.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|