natural-pdf 0.1.23__py3-none-any.whl → 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -119,29 +119,52 @@ class DocumentQA:
119
119
  def ask(
120
120
  self,
121
121
  image: Union[str, Image.Image, np.ndarray],
122
- question: str,
122
+ question: Union[str, List[str], Tuple[str, ...]],
123
123
  word_boxes: List = None,
124
124
  min_confidence: float = 0.1,
125
125
  debug: bool = False,
126
126
  debug_output_dir: str = "output",
127
- ) -> QAResult:
127
+ ) -> Union[QAResult, List[QAResult]]:
128
128
  """
129
- Ask a question about document content.
129
+ Ask one or more natural-language questions about the supplied document image.
130
+
131
+ This method now accepts a single *question* (``str``) **or** an
132
+ iterable of questions (``list``/``tuple`` of ``str``). When multiple
133
+ questions are provided they are executed in a single batch through the
134
+ underlying transformers pipeline which is considerably faster than
135
+ looping and calling :py:meth:`ask` repeatedly.
130
136
 
131
137
  Args:
132
- image: PIL Image, numpy array, or path to image file
133
- question: Question to ask about the document
134
- word_boxes: Optional pre-extracted word boxes [[text, [x0, y0, x1, y1]], ...]
135
- min_confidence: Minimum confidence threshold for answers
136
- debug: Whether to save debug information
137
- debug_output_dir: Directory to save debug files
138
+ image: PIL ``Image``, ``numpy`` array, or path to an image file.
139
+ question: A question string *or* a list/tuple of question strings.
140
+ word_boxes: Optional pre-extracted word-boxes in the LayoutLMv3
141
+ format ``[[text, [x0, y0, x1, y1]], …]``.
142
+ min_confidence: Minimum confidence threshold below which an answer
143
+ will be marked as ``found = False``.
144
+ debug: If ``True`` intermediate artefacts will be written to
145
+ *debug_output_dir* to aid troubleshooting.
146
+ debug_output_dir: Directory where debug artefacts should be saved.
138
147
 
139
148
  Returns:
140
- QAResult instance with answer details
149
+ • A single :class:`QAResult` when *question* is a string.
150
+ • A ``list`` of :class:`QAResult`` objects (one per question) when
151
+ *question* is a list/tuple.
141
152
  """
142
153
  if not self._is_initialized:
143
154
  raise RuntimeError("DocumentQA is not properly initialized")
144
155
 
156
+ # Normalise *questions* to a list so we can treat batch and single
157
+ # uniformly. We'll remember if the caller supplied a single question
158
+ # so that we can preserve the original return type.
159
+ single_question = False
160
+ if isinstance(question, str):
161
+ questions = [question]
162
+ single_question = True
163
+ elif isinstance(question, (list, tuple)) and all(isinstance(q, str) for q in question):
164
+ questions = list(question)
165
+ else:
166
+ raise TypeError("'question' must be a string or a list/tuple of strings")
167
+
145
168
  # Process the image
146
169
  if isinstance(image, str):
147
170
  # It's a file path
@@ -157,12 +180,16 @@ class DocumentQA:
157
180
  else:
158
181
  raise TypeError("Image must be a PIL Image, numpy array, or file path")
159
182
 
160
- # Prepare the query
161
- query = {"image": image_obj, "question": question}
183
+ # ------------------------------------------------------------------
184
+ # Build the queries for the pipeline (either single dict or list).
185
+ # ------------------------------------------------------------------
186
+ def _build_query_dict(q: str):
187
+ d = {"image": image_obj, "question": q}
188
+ if word_boxes:
189
+ d["word_boxes"] = word_boxes
190
+ return d
162
191
 
163
- # Add word boxes if provided
164
- if word_boxes:
165
- query["word_boxes"] = word_boxes
192
+ queries = [_build_query_dict(q) for q in questions]
166
193
 
167
194
  # Save debug information if requested
168
195
  if debug:
@@ -198,48 +225,79 @@ class DocumentQA:
198
225
  logger.info(f"Word boxes: {word_boxes_path}")
199
226
  logger.info(f"Visualization: {vis_path}")
200
227
 
201
- # Run the query through the pipeline
202
- logger.info(f"Running document QA pipeline with question: {question}")
203
- result = self.pipe(query)[0]
204
- logger.info(f"Raw result: {result}")
205
-
206
- # Save the result if debugging
207
- if debug:
208
- result_path = os.path.join(debug_output_dir, "debug_qa_result.json")
209
- with open(result_path, "w") as f:
210
- # Convert any non-serializable data
211
- serializable_result = {
212
- k: (
213
- str(v)
214
- if not isinstance(v, (str, int, float, bool, list, dict, type(None)))
215
- else v
216
- )
217
- for k, v in result.items()
218
- }
219
- json.dump(serializable_result, f, indent=2)
220
-
221
- # Check confidence against threshold
222
- if result["score"] < min_confidence:
223
- logger.info(f"Answer confidence {result['score']:.4f} below threshold {min_confidence}")
224
- return QAResult(
225
- answer="",
226
- confidence=result["score"],
227
- start=result.get("start", -1),
228
- end=result.get("end", -1),
229
- found=False,
230
- )
231
-
232
- return QAResult(
233
- answer=result["answer"],
234
- confidence=result["score"],
235
- start=result.get("start", 0),
236
- end=result.get("end", 0),
237
- found=True,
228
+ # ------------------------------------------------------------------
229
+ # Run the queries through the pipeline (batch or single) and collect
230
+ # *only the top answer* for each, mirroring the original behaviour.
231
+ # ------------------------------------------------------------------
232
+ logger.info(
233
+ f"Running document QA pipeline with {len(queries)} question{'s' if len(queries) != 1 else ''}."
238
234
  )
239
235
 
236
+ # When we pass a list the pipeline returns a list of per-question
237
+ # results; each per-question result is itself a list (top-k answers).
238
+ # We keep only the best answer (index 0) to maintain backwards
239
+ # compatibility.
240
+ raw_results = self.pipe(queries if len(queries) > 1 else queries[0])
241
+
242
+ # Ensure we always have a list aligned with *questions*
243
+ if len(queries) == 1:
244
+ raw_results = [raw_results]
245
+
246
+ processed_results: List[QAResult] = []
247
+
248
+ for q, res in zip(questions, raw_results):
249
+ top_res = res[0] if isinstance(res, list) else res # pipeline may or may not nest
250
+
251
+ # Save per-question result in debug mode
252
+ if debug:
253
+ # File names: debug_qa_result_0.json, …
254
+ result_path = os.path.join(debug_output_dir, f"debug_qa_result_{q[:30].replace(' ', '_')}.json")
255
+ try:
256
+ with open(result_path, "w") as f:
257
+ serializable = {
258
+ k: (
259
+ str(v)
260
+ if not isinstance(v, (str, int, float, bool, list, dict, type(None)))
261
+ else v
262
+ )
263
+ for k, v in top_res.items()
264
+ }
265
+ json.dump(serializable, f, indent=2)
266
+ except Exception as e:
267
+ logger.warning(f"Failed to save debug QA result for question '{q}': {e}")
268
+
269
+ # Apply confidence threshold
270
+ if top_res["score"] < min_confidence:
271
+ qa_res = QAResult(
272
+ question=q,
273
+ answer="",
274
+ confidence=top_res["score"],
275
+ start=top_res.get("start", -1),
276
+ end=top_res.get("end", -1),
277
+ found=False,
278
+ )
279
+ else:
280
+ qa_res = QAResult(
281
+ question=q,
282
+ answer=top_res["answer"],
283
+ confidence=top_res["score"],
284
+ start=top_res.get("start", 0),
285
+ end=top_res.get("end", 0),
286
+ found=True,
287
+ )
288
+
289
+ processed_results.append(qa_res)
290
+
291
+ # Return appropriately typed result (single item or list)
292
+ return processed_results[0] if single_question else processed_results
293
+
240
294
  def ask_pdf_page(
241
- self, page, question: str, min_confidence: float = 0.1, debug: bool = False
242
- ) -> QAResult:
295
+ self,
296
+ page,
297
+ question: Union[str, List[str], Tuple[str, ...]],
298
+ min_confidence: float = 0.1,
299
+ debug: bool = False,
300
+ ) -> Union[QAResult, List[QAResult]]:
243
301
  """
244
302
  Ask a question about a specific PDF page.
245
303
 
@@ -270,8 +328,8 @@ class DocumentQA:
270
328
  page_image.save(temp_path)
271
329
 
272
330
  try:
273
- # Ask the question
274
- result = self.ask(
331
+ # Ask the question(s)
332
+ result_obj = self.ask(
275
333
  image=temp_path,
276
334
  question=question,
277
335
  word_boxes=word_boxes,
@@ -279,34 +337,35 @@ class DocumentQA:
279
337
  debug=debug,
280
338
  )
281
339
 
282
- # Add page reference to the result
283
- result.page_num = page.index
340
+ # Ensure we have a list for uniform processing
341
+ results = result_obj if isinstance(result_obj, list) else [result_obj]
284
342
 
285
- # Add element references if possible
286
- if result.found and "start" in result and "end" in result:
287
- start_idx = result.start
288
- end_idx = result.end
343
+ for res in results:
344
+ # Attach page reference
345
+ res.page_num = page.index
289
346
 
290
- # Make sure we have valid indices and elements to work with
291
- if elements and 0 <= start_idx < len(word_boxes) and 0 <= end_idx < len(word_boxes):
292
- # Find the actual source elements in the original list
293
- # Since word_boxes may have filtered out some elements, we need to map indices
347
+ # Map answer span back to source elements
348
+ if res.found and "start" in res and "end" in res:
349
+ start_idx = res.start
350
+ end_idx = res.end
294
351
 
295
- # Get the text from result word boxes
296
- matched_texts = [wb[0] for wb in word_boxes[start_idx : end_idx + 1]]
352
+ if elements and 0 <= start_idx < len(word_boxes) and 0 <= end_idx < len(word_boxes):
353
+ matched_texts = [wb[0] for wb in word_boxes[start_idx : end_idx + 1]]
297
354
 
298
- # Find corresponding elements in the full element list
299
- source_elements = []
300
- for element in elements:
301
- if hasattr(element, "text") and element.text in matched_texts:
302
- source_elements.append(element)
303
- # Remove from matched texts to avoid duplicates
304
- if element.text in matched_texts:
305
- matched_texts.remove(element.text)
355
+ source_elements = []
356
+ for element in elements:
357
+ if hasattr(element, "text") and element.text in matched_texts:
358
+ source_elements.append(element)
359
+ if element.text in matched_texts:
360
+ matched_texts.remove(element.text)
306
361
 
307
- result.source_elements = ElementCollection(source_elements)
362
+ res.source_elements = ElementCollection(source_elements)
308
363
 
309
- return result
364
+ # Return result(s) preserving original input type
365
+ if isinstance(question, (list, tuple)):
366
+ return results
367
+ else:
368
+ return results[0]
310
369
 
311
370
  finally:
312
371
  # Clean up temporary file
@@ -314,8 +373,12 @@ class DocumentQA:
314
373
  os.remove(temp_path)
315
374
 
316
375
  def ask_pdf_region(
317
- self, region, question: str, min_confidence: float = 0.1, debug: bool = False
318
- ) -> QAResult:
376
+ self,
377
+ region,
378
+ question: Union[str, List[str], Tuple[str, ...]],
379
+ min_confidence: float = 0.1,
380
+ debug: bool = False,
381
+ ) -> Union[QAResult, List[QAResult]]:
319
382
  """
320
383
  Ask a question about a specific region of a PDF page.
321
384
 
@@ -352,8 +415,8 @@ class DocumentQA:
352
415
  region_image.save(temp_path)
353
416
 
354
417
  try:
355
- # Ask the question
356
- result = self.ask(
418
+ # Ask the question(s)
419
+ result_obj = self.ask(
357
420
  image=temp_path,
358
421
  question=question,
359
422
  word_boxes=word_boxes,
@@ -361,35 +424,29 @@ class DocumentQA:
361
424
  debug=debug,
362
425
  )
363
426
 
364
- # Add region reference to the result
365
- result.region = region
366
- result.page_num = region.page.index
427
+ results = result_obj if isinstance(result_obj, list) else [result_obj]
367
428
 
368
- # Add element references if possible
369
- if result.found and "start" in result and "end" in result:
370
- start_idx = result.start
371
- end_idx = result.end
429
+ for res in results:
430
+ res.region = region
431
+ res.page_num = region.page.index
372
432
 
373
- # Make sure we have valid indices and elements to work with
374
- if elements and 0 <= start_idx < len(word_boxes) and 0 <= end_idx < len(word_boxes):
375
- # Find the actual source elements in the original list
376
- # Since word_boxes may have filtered out some elements, we need to map indices
433
+ if res.found and "start" in res and "end" in res:
434
+ start_idx = res.start
435
+ end_idx = res.end
377
436
 
378
- # Get the text from result word boxes
379
- matched_texts = [wb[0] for wb in word_boxes[start_idx : end_idx + 1]]
437
+ if elements and 0 <= start_idx < len(word_boxes) and 0 <= end_idx < len(word_boxes):
438
+ matched_texts = [wb[0] for wb in word_boxes[start_idx : end_idx + 1]]
380
439
 
381
- # Find corresponding elements in the full element list
382
- source_elements = []
383
- for element in elements:
384
- if hasattr(element, "text") and element.text in matched_texts:
385
- source_elements.append(element)
386
- # Remove from matched texts to avoid duplicates
387
- if element.text in matched_texts:
388
- matched_texts.remove(element.text)
440
+ source_elements = []
441
+ for element in elements:
442
+ if hasattr(element, "text") and element.text in matched_texts:
443
+ source_elements.append(element)
444
+ if element.text in matched_texts:
445
+ matched_texts.remove(element.text)
389
446
 
390
- result.source_elements = ElementCollection(source_elements)
447
+ res.source_elements = ElementCollection(source_elements)
391
448
 
392
- return result
449
+ return results if isinstance(question, (list, tuple)) else results[0]
393
450
 
394
451
  finally:
395
452
  # Clean up temporary file
@@ -63,9 +63,9 @@ def _get_layout_kwargs(
63
63
  else:
64
64
  logger.warning(f"Ignoring unsupported layout keyword argument: '{key}'")
65
65
 
66
- # 4. Ensure layout flag is present, defaulting to True
66
+ # 4. Ensure layout flag is present, defaulting to False (caller can override)
67
67
  if "layout" not in layout_kwargs:
68
- layout_kwargs["layout"] = True
68
+ layout_kwargs["layout"] = False
69
69
 
70
70
  return layout_kwargs
71
71
 
@@ -203,24 +203,42 @@ def generate_text_layout(
203
203
  logger.debug("generate_text_layout: No valid character dicts found after filtering.")
204
204
  return ""
205
205
 
206
- # Prepare layout arguments
207
- layout_kwargs = _get_layout_kwargs(layout_context_bbox, user_kwargs)
208
- use_layout = layout_kwargs.pop("layout", True) # Extract layout flag, default True
206
+ # Make a working copy of user_kwargs so we can safely pop custom keys
207
+ incoming_kwargs = user_kwargs.copy() if user_kwargs else {}
209
208
 
210
- if not use_layout:
211
- # Simple join if layout=False
212
- logger.debug("generate_text_layout: Using simple join (layout=False requested).")
213
- # Sort before joining if layout is off
214
- valid_char_dicts.sort(key=lambda c: (c.get("top", 0), c.get("x0", 0)))
215
- result = "".join(c.get("text", "") for c in valid_char_dicts) # Use valid chars
216
- return result
209
+ # --- Handle custom 'strip' option ------------------------------------
210
+ # * strip=True – post-process the final string to remove leading/trailing
211
+ # whitespace (typically used when layout=False)
212
+ # * strip=False preserve whitespace exactly as produced.
213
+ # Default behaviour depends on the layout flag (see below).
214
+ explicit_strip_flag = incoming_kwargs.pop("strip", None) # May be None
215
+
216
+ # Prepare layout arguments now that we've removed the non-pdfplumber key
217
+ layout_kwargs = _get_layout_kwargs(layout_context_bbox, incoming_kwargs)
218
+ use_layout = layout_kwargs.get("layout", False)
219
+
220
+ # Determine final strip behaviour: if caller specified override, honour it;
221
+ # otherwise default to !use_layout (True when layout=False, False when
222
+ # layout=True) per user request.
223
+ strip_result = explicit_strip_flag if explicit_strip_flag is not None else (not use_layout)
217
224
 
218
225
  try:
219
- # Sort chars primarily by top, then x0 before layout analysis
220
- # This helps pdfplumber group lines correctly
226
+ # Sort chars primarily by top, then x0 before layout analysis – required by
227
+ # pdfplumber so that grouping into lines works deterministically.
221
228
  valid_char_dicts.sort(key=lambda c: (c.get("top", 0), c.get("x0", 0)))
229
+
230
+ # Build the text map. `layout_kwargs` still contains the caller-specified or
231
+ # default "layout" flag, which chars_to_textmap will respect.
222
232
  textmap = chars_to_textmap(valid_char_dicts, **layout_kwargs)
223
233
  result = textmap.as_string
234
+
235
+ # ----------------------------------------------------------------
236
+ # Optional post-processing strip
237
+ # ----------------------------------------------------------------
238
+ if strip_result and isinstance(result, str):
239
+ # Remove trailing spaces on each line then trim leading/trailing
240
+ # blank lines for a cleaner output while keeping internal newlines.
241
+ result = "\n".join(line.rstrip() for line in result.splitlines()).strip()
224
242
  except Exception as e:
225
243
  # Fallback to simple join on error
226
244
  logger.error(f"generate_text_layout: Error calling chars_to_textmap: {e}", exc_info=False)
@@ -230,5 +248,7 @@ def generate_text_layout(
230
248
  # Fallback already has sorted characters if layout was attempted
231
249
  # Need to use the valid_char_dicts here too
232
250
  result = "".join(c.get("text", "") for c in valid_char_dicts)
251
+ if strip_result:
252
+ result = result.strip()
233
253
 
234
254
  return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.23
3
+ Version: 0.1.24
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.9
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
+ Requires-Dist: markdown
14
15
  Requires-Dist: pandas
15
16
  Requires-Dist: pdfplumber
16
17
  Requires-Dist: colormath2
@@ -1,7 +1,7 @@
1
1
  natural_pdf/__init__.py,sha256=qDFJNF8sbEDO-2WSFAxoWEM8updOUP6dB-ckya0kxfs,3275
2
2
  natural_pdf/cli.py,sha256=IXrP2lCHihr-ed-CFiDbMTnSsutQa1j1PYALOLGbpsc,4019
3
3
  natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
4
- natural_pdf/analyzers/shape_detection_mixin.py,sha256=blpeHMWl6nXlutAByfdi6zjfmcyaDpdv2S7IR4l0WO0,81783
4
+ natural_pdf/analyzers/shape_detection_mixin.py,sha256=aHn4EMdbwOe8VWECPceGs5wN7gJP_kIxyAbmbNlNPSs,83634
5
5
  natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
6
6
  natural_pdf/analyzers/text_structure.py,sha256=VfKTsTFrK877sC0grsis9jK3rrgp0Mbp13VWEbukTcs,28437
7
7
  natural_pdf/analyzers/utils.py,sha256=PYbzJzSAHZ7JsMes84WIrSbA0zkjJGs0CLvIeINsf_k,2100
@@ -25,21 +25,21 @@ natural_pdf/collections/mixins.py,sha256=sj76Cn6EdBtb5f-bdAV-1qpdixX8tI4BzPccPiY
25
25
  natural_pdf/collections/pdf_collection.py,sha256=HLlyakM--23ZOeHDPucoM6Tw3yUyMXm0SSoqJwxRc2E,30744
26
26
  natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
27
27
  natural_pdf/core/element_manager.py,sha256=_UdXu51sLi6STzc8Pj4k8R721G3yJixXDLuRHn3hmr8,25731
28
- natural_pdf/core/highlighting_service.py,sha256=wWoU2kJ_JBbxKV3NWEjqU6DLvmlwME9sTntk-TDqOfs,38223
29
- natural_pdf/core/page.py,sha256=U4GRy_zdoTB4sx4EPrAIKg4beIQ8atJsY5HX_jWfDjg,118953
28
+ natural_pdf/core/highlighting_service.py,sha256=DKoaxiiuQsWgtf6wSroMAIcFiqJOOF7dXhciYdQKdCw,38223
29
+ natural_pdf/core/page.py,sha256=TOtpUp5lRhDj32wv3yvRaS8kxPX6R9904OCC6uHFi84,119512
30
30
  natural_pdf/core/pdf.py,sha256=qsSW4RxOJRmCnweLPMs0NhzkRfiAVdghTgnh4D_wuO4,74295
31
31
  natural_pdf/describe/__init__.py,sha256=B3zjuHjFI_dFuBLgXR1Q4v7c72fVDyk84d2hs0H4KV8,561
32
- natural_pdf/describe/base.py,sha256=LAZLc_thK2u2surgGd0Pk7CN2uVaZK9AbMOE3-1RmQ4,16842
32
+ natural_pdf/describe/base.py,sha256=mUvEydumXXPJ2FkWAYm1BbWrRWY81I0dMyQrEU32rmc,17256
33
33
  natural_pdf/describe/elements.py,sha256=xD8wwR1z5IKat7RIwoAwQRUEL6zJTEwcOKorF4F-xPg,12717
34
34
  natural_pdf/describe/mixin.py,sha256=U0x6v8r57KQb8qC3VVo64hvhfXQWsti8vdKBM7AXnMo,3116
35
- natural_pdf/describe/summary.py,sha256=dPtjrn6fQ8nL0F74RITX2vXlDX7ZgaX9JQPnJB-S_XQ,6735
35
+ natural_pdf/describe/summary.py,sha256=h5zy9zG7t27wFnJ2hEguGSoURtN2IR4x6WBO3aXB4eo,7980
36
36
  natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
37
- natural_pdf/elements/base.py,sha256=iTIy6FfQj48llZkm7wERnTky3VTmUgkYfQytRuyueZo,43304
38
- natural_pdf/elements/collections.py,sha256=zaqJ8pr0dmYwv1gPBs24oXfZExpSIX4URDRox-QLj98,123173
37
+ natural_pdf/elements/base.py,sha256=iw-Ab0o7eI69npt0gAxQvA14GPWHAAhkLrJ_JeKvIos,43309
38
+ natural_pdf/elements/collections.py,sha256=JrM42VPRtDOJ9Q9KIR3SrcbamiiCHXI4nzTq2BBkeEk,124223
39
39
  natural_pdf/elements/line.py,sha256=300kSFBDUBIudfeQtH_tzW9gTYRgRKUDPiTABw6J-BE,4782
40
40
  natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
41
- natural_pdf/elements/region.py,sha256=BAOriJuQYovppV0S5xI6tq5YEuzffiMQneDvHuT22Uo,118562
42
- natural_pdf/elements/text.py,sha256=13HvVZGinj2Vm_fFCAnqi7hohtoKvnpCp3VCfkpeAbc,11146
41
+ natural_pdf/elements/region.py,sha256=CVncbiCk8ivn04CI7Ob93O7UY0ANVpCJwikBt-jVWgg,123698
42
+ natural_pdf/elements/text.py,sha256=yshGrvdiBZSkYhQfdi6Yz6NN0kWvmqKHSSC82D829os,11470
43
43
  natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
44
44
  natural_pdf/exporters/__init__.py,sha256=g1WRPCDVzceaUUsm8dchPhzdHFSjYM0NfRyc8iN0mtE,644
45
45
  natural_pdf/exporters/base.py,sha256=XhR1xlkHOh7suOuX7mWbsj1h2o1pZNet-OAS5YCJyeI,2115
@@ -58,7 +58,7 @@ natural_pdf/flows/__init__.py,sha256=cUN4A8hTDLZSRr4PO2W_lR4z6hWpbNG8Seox-IIcrLU
58
58
  natural_pdf/flows/collections.py,sha256=qGuSPFSPQF-wiYquG6STiSzg_o951MSsFEq_B44Jef8,28441
59
59
  natural_pdf/flows/element.py,sha256=mKzk3B7A7sWNvu4CDvAjLr3_ZFLt--ktrSNoLfLpFxU,23940
60
60
  natural_pdf/flows/flow.py,sha256=I61BpFVDQyo6ORsmoqoYiOEP1DBRp0vgDJjm_V8frhc,10562
61
- natural_pdf/flows/region.py,sha256=hucKKmjjmLt__x-RiX6S1Amsp88yweyjcgWJ7PQtTgY,22187
61
+ natural_pdf/flows/region.py,sha256=4U3S7pLEa3oCyPfS-hpD0lSXf8MWT-MdF9AsVvMJbWU,26670
62
62
  natural_pdf/ocr/__init__.py,sha256=VY8hhvDPf7Gh2lB-d2QRmghLLyTy6ydxlgo1cS4dOSk,2482
63
63
  natural_pdf/ocr/engine.py,sha256=ZBC1tZNM5EDbGDJJmZI9mNHr4nCMLEZvUFhiJq8GdF4,8741
64
64
  natural_pdf/ocr/engine_doctr.py,sha256=ptKrupMWoulZb-R93zr9btoe94JPWU7vlJuN7OBJEIM,17740
@@ -70,7 +70,7 @@ natural_pdf/ocr/ocr_manager.py,sha256=K2gpFo3e6RB1ouXOstlEAAYd14DbjBNt5RH6J7ZdDQ
70
70
  natural_pdf/ocr/ocr_options.py,sha256=l33QKu_93r-uwi3t_v8UH8pEgHo6HTVzP4tfmQFRF1w,5488
71
71
  natural_pdf/ocr/utils.py,sha256=OxuHwDbHWj6setvnC0QYwMHrAjxGkhmLzWHpMqqGupA,4397
72
72
  natural_pdf/qa/__init__.py,sha256=2u2KJcA71g1I0HnLD-j6yvDw1moAjo9kkLhhfoYRURM,166
73
- natural_pdf/qa/document_qa.py,sha256=bwOrO_bq_9wEaLu7j7h8EkN3ya5xMxDoE7oNurEb6-E,14889
73
+ natural_pdf/qa/document_qa.py,sha256=6-XuIEFf5BcVA_e85FBmAeXpNZgzZhTBDkNUMPAl-tc,17803
74
74
  natural_pdf/qa/qa_result.py,sha256=_q4dlSqsjtgomcI8-pqbOT69lqQKnEMkhZNydoxEkkE,2227
75
75
  natural_pdf/search/__init__.py,sha256=0Xa7tT_2q57wHObFMQLQLd4gd9AV0oyS-svV6BmmdMI,4276
76
76
  natural_pdf/search/lancedb_search_service.py,sha256=6dz2IEZUWk3hFW28C-LF_85pWohd7Sr5k44bM0pBdm4,14472
@@ -88,13 +88,13 @@ natural_pdf/utils/identifiers.py,sha256=P7n6owcubnF8oAMa_UfYtENmIaJQdH_AMC9Jbs2b
88
88
  natural_pdf/utils/locks.py,sha256=7HJqV0VsNcOfISnbw8goCKWP5ck11uSJo6T_x9XIPKI,215
89
89
  natural_pdf/utils/packaging.py,sha256=Jshxp6S1zfcqoZmFhdd7WOpL--b6rBSz-Y9mYqELXIY,21581
90
90
  natural_pdf/utils/reading_order.py,sha256=s3DsYq_3g_1YA07qhd4BGEjeIRTeyGtnwc_hNtSzwBY,7290
91
- natural_pdf/utils/text_extraction.py,sha256=z6Jhy11pakYCsEpkvh8ldw6DkUFsYF1hCL9YDmfXWL4,9605
91
+ natural_pdf/utils/text_extraction.py,sha256=mDeN1_VevNi3RwvFe48PM5vBh-A5WeBlYgP6lSjBaOk,10854
92
92
  natural_pdf/utils/visualization.py,sha256=30pRWQdsRJh2pSObh-brKVsFgC1n8tHmSrta_UDnVPw,8989
93
93
  natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
94
94
  natural_pdf/widgets/viewer.py,sha256=2VUY1TzWMDe9I-IVNOosKZ2LaqpjLB62ftMAdk-s6_8,24952
95
- natural_pdf-0.1.23.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
96
- natural_pdf-0.1.23.dist-info/METADATA,sha256=z7Mq5yr_sckn7pFR1KqBz_fG2sG-jBBSb2czsRrzC_k,6660
97
- natural_pdf-0.1.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
98
- natural_pdf-0.1.23.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
99
- natural_pdf-0.1.23.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
100
- natural_pdf-0.1.23.dist-info/RECORD,,
95
+ natural_pdf-0.1.24.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
96
+ natural_pdf-0.1.24.dist-info/METADATA,sha256=qcyQUXKXciLsomzdsdkQ4inSw_MJbczyj8oPq4KVGZQ,6684
97
+ natural_pdf-0.1.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
98
+ natural_pdf-0.1.24.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
99
+ natural_pdf-0.1.24.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
100
+ natural_pdf-0.1.24.dist-info/RECORD,,