preocr 1.2.0__tar.gz → 1.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {preocr-1.2.0 → preocr-1.2.2}/PKG-INFO +1 -1
  2. {preocr-1.2.0 → preocr-1.2.2}/preocr/analysis/opencv_layout.py +2 -2
  3. {preocr-1.2.0 → preocr-1.2.2}/preocr/analysis/page_detection.py +2 -2
  4. {preocr-1.2.0 → preocr-1.2.2}/preocr/core/decision.py +44 -37
  5. {preocr-1.2.0 → preocr-1.2.2}/preocr/core/detector.py +7 -7
  6. {preocr-1.2.0 → preocr-1.2.2}/preocr/core/extractor.py +3 -3
  7. {preocr-1.2.0 → preocr-1.2.2}/preocr/extraction/base.py +1 -1
  8. {preocr-1.2.0 → preocr-1.2.2}/preocr/extraction/formatters.py +10 -10
  9. {preocr-1.2.0 → preocr-1.2.2}/preocr/extraction/pdf_extractor.py +390 -343
  10. {preocr-1.2.0 → preocr-1.2.2}/preocr/probes/pdf_probe.py +3 -3
  11. {preocr-1.2.0 → preocr-1.2.2}/preocr/utils/logger.py +11 -9
  12. {preocr-1.2.0 → preocr-1.2.2}/preocr/version.py +1 -1
  13. {preocr-1.2.0 → preocr-1.2.2}/preocr.egg-info/PKG-INFO +1 -1
  14. {preocr-1.2.0 → preocr-1.2.2}/preocr.egg-info/SOURCES.txt +2 -0
  15. preocr-1.2.2/tests/test_config_thresholds.py +94 -0
  16. {preocr-1.2.0 → preocr-1.2.2}/tests/test_hybrid_pipeline.py +1 -1
  17. preocr-1.2.2/tests/test_layout_aware_needs_ocr.py +109 -0
  18. {preocr-1.2.0 → preocr-1.2.2}/tests/test_opencv_layout.py +8 -2
  19. {preocr-1.2.0 → preocr-1.2.2}/tests/test_pdf_probe.py +0 -1
  20. {preocr-1.2.0 → preocr-1.2.2}/LICENSE +0 -0
  21. {preocr-1.2.0 → preocr-1.2.2}/README.md +0 -0
  22. {preocr-1.2.0 → preocr-1.2.2}/preocr/__init__.py +0 -0
  23. {preocr-1.2.0 → preocr-1.2.2}/preocr/analysis/__init__.py +0 -0
  24. {preocr-1.2.0 → preocr-1.2.2}/preocr/analysis/layout_analyzer.py +0 -0
  25. {preocr-1.2.0 → preocr-1.2.2}/preocr/constants.py +0 -0
  26. {preocr-1.2.0 → preocr-1.2.2}/preocr/core/__init__.py +0 -0
  27. {preocr-1.2.0 → preocr-1.2.2}/preocr/core/signals.py +0 -0
  28. {preocr-1.2.0 → preocr-1.2.2}/preocr/exceptions.py +0 -0
  29. {preocr-1.2.0 → preocr-1.2.2}/preocr/extraction/__init__.py +0 -0
  30. {preocr-1.2.0 → preocr-1.2.2}/preocr/extraction/office_extractor.py +0 -0
  31. {preocr-1.2.0 → preocr-1.2.2}/preocr/extraction/schemas.py +0 -0
  32. {preocr-1.2.0 → preocr-1.2.2}/preocr/extraction/text_extractor.py +0 -0
  33. {preocr-1.2.0 → preocr-1.2.2}/preocr/probes/__init__.py +0 -0
  34. {preocr-1.2.0 → preocr-1.2.2}/preocr/probes/image_probe.py +0 -0
  35. {preocr-1.2.0 → preocr-1.2.2}/preocr/probes/office_probe.py +0 -0
  36. {preocr-1.2.0 → preocr-1.2.2}/preocr/probes/text_probe.py +0 -0
  37. {preocr-1.2.0 → preocr-1.2.2}/preocr/py.typed +0 -0
  38. {preocr-1.2.0 → preocr-1.2.2}/preocr/reason_codes.py +0 -0
  39. {preocr-1.2.0 → preocr-1.2.2}/preocr/utils/__init__.py +0 -0
  40. {preocr-1.2.0 → preocr-1.2.2}/preocr/utils/batch.py +0 -0
  41. {preocr-1.2.0 → preocr-1.2.2}/preocr/utils/cache.py +0 -0
  42. {preocr-1.2.0 → preocr-1.2.2}/preocr/utils/filetype.py +0 -0
  43. {preocr-1.2.0 → preocr-1.2.2}/preocr.egg-info/dependency_links.txt +0 -0
  44. {preocr-1.2.0 → preocr-1.2.2}/preocr.egg-info/requires.txt +0 -0
  45. {preocr-1.2.0 → preocr-1.2.2}/preocr.egg-info/top_level.txt +0 -0
  46. {preocr-1.2.0 → preocr-1.2.2}/pyproject.toml +0 -0
  47. {preocr-1.2.0 → preocr-1.2.2}/setup.cfg +0 -0
  48. {preocr-1.2.0 → preocr-1.2.2}/tests/test_batch.py +0 -0
  49. {preocr-1.2.0 → preocr-1.2.2}/tests/test_decision.py +0 -0
  50. {preocr-1.2.0 → preocr-1.2.2}/tests/test_detector.py +0 -0
  51. {preocr-1.2.0 → preocr-1.2.2}/tests/test_filetype.py +0 -0
  52. {preocr-1.2.0 → preocr-1.2.2}/tests/test_image_probe.py +0 -0
  53. {preocr-1.2.0 → preocr-1.2.2}/tests/test_integration.py +0 -0
  54. {preocr-1.2.0 → preocr-1.2.2}/tests/test_layout_analyzer.py +0 -0
  55. {preocr-1.2.0 → preocr-1.2.2}/tests/test_office_probe.py +0 -0
  56. {preocr-1.2.0 → preocr-1.2.2}/tests/test_page_detection.py +0 -0
  57. {preocr-1.2.0 → preocr-1.2.2}/tests/test_reason_codes.py +0 -0
  58. {preocr-1.2.0 → preocr-1.2.2}/tests/test_signals.py +0 -0
  59. {preocr-1.2.0 → preocr-1.2.2}/tests/test_text_probe.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: preocr
3
- Version: 1.2.0
3
+ Version: 1.2.2
4
4
  Summary: A fast, CPU-only library that intelligently detects whether files need OCR processing before expensive OCR operations. Uses hybrid adaptive pipeline for 92-95% accuracy.
5
5
  Author: PreOCR Contributors
6
6
  License-Expression: Apache-2.0
@@ -349,8 +349,8 @@ def _contours_overlap(contour1, contour2, overlap_threshold: float = 0.3) -> boo
349
349
 
350
350
  try:
351
351
  # Get bounding boxes
352
- x1, y1, w1, h1 = _cv2.boundingRect(contour1)
353
- x2, y2, w2, h2 = _cv2.boundingRect(contour2)
352
+ x1, y1, w1, h1 = cv2.boundingRect(contour1)
353
+ x2, y2, w2, h2 = cv2.boundingRect(contour2)
354
354
 
355
355
  # Calculate intersection
356
356
  x_overlap = max(0, min(x1 + w1, x2 + w2) - max(x1, x2))
@@ -42,11 +42,11 @@ def analyze_pdf_pages(
42
42
  2. Adjusted based on consistency:
43
43
  - If all pages are consistent (all need OCR or all don't), confidence +0.1
44
44
  - If pages are mixed (some need OCR, some don't), confidence -0.1
45
-
45
+
46
46
  This means:
47
47
  - Uniform documents (all scanned or all digital) get higher confidence
48
48
  - Mixed documents get lower confidence, reflecting the uncertainty
49
-
49
+
50
50
  Per-page confidence:
51
51
  - Pages with text: 0.95 (high confidence)
52
52
  - Pages without text: 0.80 if completely empty, 0.60 if sparse text
@@ -22,44 +22,44 @@ def calculate_ocr_score(
22
22
  ) -> float:
23
23
  """
24
24
  Calculate OCR_SCORE using pixel-aware scoring model.
25
-
26
- OCR_SCORE = 0.35 * image_ratio + 0.25 * (1 - alphabet_ratio) +
25
+
26
+ OCR_SCORE = 0.35 * image_ratio + 0.25 * (1 - alphabet_ratio) +
27
27
  0.2 * low_text_density + 0.2 * font_suspicion
28
-
28
+
29
29
  Args:
30
30
  text_length: Length of extracted text
31
31
  image_coverage: Image coverage percentage (0-100)
32
32
  text_coverage: Text coverage percentage (0-100)
33
33
  config: Optional Config object
34
-
34
+
35
35
  Returns:
36
36
  OCR_SCORE (0.0-1.0) where higher score indicates more likely to need OCR
37
37
  """
38
38
  if config is None:
39
39
  config = _DEFAULT_CONFIG
40
-
40
+
41
41
  # Calculate image_ratio from image_coverage (convert percentage to ratio)
42
42
  image_ratio = image_coverage / 100.0 if image_coverage > 0 else 0.0
43
-
43
+
44
44
  # Approximate alphabet_ratio (normalized text length factor)
45
45
  max_expected_text = 10000 # Reasonable max for a page
46
46
  alphabet_ratio = min(text_length / max_expected_text, 1.0) if text_length > 0 else 0.0
47
-
47
+
48
48
  # Calculate low_text_density (inverse of text_coverage, normalized)
49
49
  text_density = text_coverage / 100.0 if text_coverage > 0 else 0.0
50
50
  low_text_density = 1.0 - min(text_density, 1.0)
51
-
51
+
52
52
  # Font suspicion: higher when text_length is very low
53
53
  font_suspicion = 1.0 - min(text_length / 50.0, 1.0) if text_length < 50 else 0.0
54
-
54
+
55
55
  # Calculate OCR score
56
56
  ocr_score = (
57
- 0.35 * image_ratio +
58
- 0.25 * (1.0 - alphabet_ratio) +
59
- 0.20 * low_text_density +
60
- 0.20 * font_suspicion
57
+ 0.35 * image_ratio
58
+ + 0.25 * (1.0 - alphabet_ratio)
59
+ + 0.20 * low_text_density
60
+ + 0.20 * font_suspicion
61
61
  )
62
-
62
+
63
63
  return round(ocr_score, 3)
64
64
 
65
65
 
@@ -71,24 +71,24 @@ def calculate_confidence_from_signals(
71
71
  ) -> float:
72
72
  """
73
73
  Calculate confidence score from signals using unified approach.
74
-
74
+
75
75
  Priority:
76
76
  1. Use OCR_SCORE if available (most accurate)
77
77
  2. Use layout-based calculation
78
78
  3. Fallback to text-length based
79
-
79
+
80
80
  Args:
81
81
  signals: Dictionary of signals from signals.collect_signals()
82
82
  needs_ocr: Boolean indicating if OCR is needed
83
83
  ocr_score: Optional OCR_SCORE (0.0-1.0) if already calculated
84
84
  config: Optional Config object
85
-
85
+
86
86
  Returns:
87
87
  Confidence score (0.0-1.0)
88
88
  """
89
89
  if config is None:
90
90
  config = _DEFAULT_CONFIG
91
-
91
+
92
92
  # Priority 1: Use OCR_SCORE if available (most accurate)
93
93
  if ocr_score is not None and config.use_ocr_score_confidence:
94
94
  # Calibrate OCR_SCORE to confidence range (0.50-0.95)
@@ -99,13 +99,13 @@ def calculate_confidence_from_signals(
99
99
  # Lower OCR_SCORE = higher confidence for "no OCR"
100
100
  confidence = 0.50 + ((1.0 - ocr_score) * 0.45) # Range: 0.50-0.95
101
101
  return round(confidence, 2)
102
-
102
+
103
103
  # Priority 2: Layout-based (if layout data available)
104
104
  layout_type = signals.get("layout_type")
105
105
  if layout_type and layout_type != "unknown":
106
106
  text_coverage = float(signals.get("text_coverage", 0.0))
107
107
  image_coverage = float(signals.get("image_coverage", 0.0))
108
-
108
+
109
109
  if needs_ocr:
110
110
  # More images = higher confidence
111
111
  image_factor = min(image_coverage / 100.0, 1.0)
@@ -115,7 +115,7 @@ def calculate_confidence_from_signals(
115
115
  text_factor = min(text_coverage / 100.0, 1.0)
116
116
  confidence = 0.70 + (text_factor * 0.25) # Range: 0.70-0.95
117
117
  return round(confidence, 2)
118
-
118
+
119
119
  # Priority 3: Text-length based fallback
120
120
  text_length = signals.get("text_length", 0)
121
121
  if needs_ocr:
@@ -129,7 +129,7 @@ def calculate_confidence_from_signals(
129
129
  # More text = higher confidence (digital)
130
130
  text_factor = min(text_length / 1000.0, 1.0)
131
131
  confidence = 0.75 + (text_factor * 0.20) # Range: 0.75-0.95
132
-
132
+
133
133
  return round(confidence, 2)
134
134
 
135
135
 
@@ -205,21 +205,25 @@ def decide(
205
205
  is_mixed_content = signals.get("is_mixed_content", False)
206
206
  text_coverage = signals.get("text_coverage", 0.0)
207
207
  image_coverage = signals.get("image_coverage", 0.0)
208
-
208
+
209
209
  # Calculate image_ratio from image_coverage (convert percentage to ratio)
210
210
  # Also check OpenCV results if available (more accurate for scanned PDFs)
211
211
  opencv_layout = signals.get("opencv_layout", {})
212
212
  image_coverage_opencv = opencv_layout.get("image_coverage", 0.0) if opencv_layout else 0.0
213
-
213
+
214
214
  # Use OpenCV image_coverage if available (more accurate), otherwise use layout image_coverage
215
- effective_image_coverage = image_coverage_opencv if image_coverage_opencv > 0 else image_coverage
215
+ effective_image_coverage = (
216
+ image_coverage_opencv if image_coverage_opencv > 0 else image_coverage
217
+ )
216
218
  image_ratio = effective_image_coverage / 100.0 if effective_image_coverage > 0 else 0.0
217
-
219
+
218
220
  # Calculate OCR_SCORE for unified confidence calculation
219
221
  ocr_score = None
220
222
  if layout_type and layout_type != "unknown":
221
- ocr_score = calculate_ocr_score(text_length, effective_image_coverage, text_coverage, config)
222
-
223
+ ocr_score = calculate_ocr_score(
224
+ text_length, effective_image_coverage, text_coverage, config
225
+ )
226
+
223
227
  # 🔥 Hybrid Rule: Sweet spot for OCR detection
224
228
  # If image_ratio > 0.75 AND extracted_text_length < 30 → OCR
225
229
  # This catches scanned PDFs that are image-heavy with minimal extractable text
@@ -237,7 +241,7 @@ def decide(
237
241
  CATEGORY_UNSTRUCTURED,
238
242
  ReasonCode.PDF_SCANNED,
239
243
  )
240
-
244
+
241
245
  # Alternative: If text_length is very low (< 30) and we have layout data suggesting images
242
246
  # This handles cases where scanned PDFs aren't detected as images but have no text
243
247
  if text_length < 30 and layout_type and layout_type != "unknown":
@@ -276,7 +280,7 @@ def decide(
276
280
  CATEGORY_UNSTRUCTURED,
277
281
  ReasonCode.PDF_MIXED,
278
282
  )
279
-
283
+
280
284
  # If text coverage is significant, might not need full OCR
281
285
  if text_length >= config.min_text_length and text_coverage > 10:
282
286
  confidence = calculate_confidence_from_signals(
@@ -359,7 +363,7 @@ def decide(
359
363
  CATEGORY_UNSTRUCTURED,
360
364
  ReasonCode.PDF_SCANNED,
361
365
  )
362
-
366
+
363
367
  # Fallback to text-length based decision (when layout analysis not available)
364
368
  if text_length >= config.min_text_length:
365
369
  # Use unified confidence calculation (fallback mode)
@@ -472,21 +476,21 @@ def refine_with_opencv(
472
476
  image_coverage_opencv = opencv_result.get("image_coverage", 0.0)
473
477
  has_text_regions = opencv_result.get("has_text_regions", False)
474
478
  layout_type = opencv_result.get("layout_type", "unknown")
475
-
479
+
476
480
  # Calculate OCR_SCORE from OpenCV results for unified confidence
477
481
  ocr_score_opencv = calculate_ocr_score(
478
482
  text_length, image_coverage_opencv, text_coverage_opencv, config
479
483
  )
480
-
484
+
481
485
  # Update signals with OpenCV layout data for confidence calculation
482
486
  signals_with_opencv = signals.copy()
483
487
  signals_with_opencv["layout_type"] = layout_type
484
488
  signals_with_opencv["text_coverage"] = text_coverage_opencv
485
489
  signals_with_opencv["image_coverage"] = image_coverage_opencv
486
-
490
+
487
491
  # Calculate image_ratio from image_coverage (convert percentage to ratio)
488
492
  image_ratio = image_coverage_opencv / 100.0 if image_coverage_opencv > 0 else 0.0
489
-
493
+
490
494
  # 🔥 Hybrid Rule: Sweet spot for OCR detection (applied in OpenCV refinement too)
491
495
  # If image_ratio > 0.75 AND extracted_text_length < 30 → OCR
492
496
  if image_ratio > 0.75 and text_length < 30:
@@ -526,7 +530,7 @@ def refine_with_opencv(
526
530
  CATEGORY_UNSTRUCTURED,
527
531
  ReasonCode.PDF_MIXED,
528
532
  )
529
-
533
+
530
534
  if text_length >= config.min_text_length and text_coverage_opencv > 15:
531
535
  # Digital text document - use unified confidence calculation
532
536
  confidence = calculate_confidence_from_signals(
@@ -607,7 +611,10 @@ def refine_with_opencv(
607
611
  if (initial_needs_ocr and not has_text_regions) or (not initial_needs_ocr and has_text_regions):
608
612
  # Calculate OCR_SCORE-based confidence
609
613
  ocr_confidence = calculate_confidence_from_signals(
610
- signals_with_opencv, needs_ocr=initial_needs_ocr, ocr_score=ocr_score_opencv, config=config
614
+ signals_with_opencv,
615
+ needs_ocr=initial_needs_ocr,
616
+ ocr_score=ocr_score_opencv,
617
+ config=config,
611
618
  )
612
619
  # Weighted combination: 30% initial, 70% OCR_SCORE-based (OpenCV is more accurate)
613
620
  confidence = (initial_confidence * 0.3) + (ocr_confidence * 0.7)
@@ -66,20 +66,20 @@ def needs_ocr(
66
66
 
67
67
  Note on Confidence Scores:
68
68
  Confidence scores may vary between page_level=True and page_level=False modes:
69
-
69
+
70
70
  - **Without page_level**: Confidence is calculated based on document-level heuristics
71
71
  and OpenCV analysis (if triggered). Typical range: 0.60-0.95.
72
-
72
+
73
73
  - **With page_level=True**: Confidence is calculated as the average of per-page
74
74
  confidence scores, adjusted for consistency. For mixed documents (some pages
75
75
  need OCR, some don't), confidence may be lower due to the averaging effect.
76
76
  Typical range: 0.60-0.95, but may be lower for mixed documents.
77
-
77
+
78
78
  - **Why the difference**: Page-level analysis provides more granular information
79
79
  but averages confidence across pages. Document-level analysis uses overall
80
80
  text extraction and layout analysis, which can be more confident for uniform
81
81
  documents.
82
-
82
+
83
83
  Both modes are accurate; the difference reflects the analysis granularity.
84
84
  Use page_level=True when you need per-page decisions, otherwise use the
85
85
  default (page_level=False) for faster, document-level decisions.
@@ -187,7 +187,7 @@ def needs_ocr(
187
187
  if opencv_result:
188
188
  # Add OpenCV results to signals BEFORE refining (so hybrid rule can use it)
189
189
  collected_signals["opencv_layout"] = opencv_result
190
-
190
+
191
191
  # Refine decision based on OpenCV analysis
192
192
  needs_ocr_flag, reason, confidence, category, reason_code = decision.refine_with_opencv(
193
193
  collected_signals,
@@ -218,14 +218,14 @@ def needs_ocr(
218
218
  if page_analysis and "pages" in page_analysis:
219
219
  page_count = page_analysis.get("page_count", 0)
220
220
  pages_list = page_analysis.get("pages", [])
221
-
221
+
222
222
  # Only add page-level data if it's valid
223
223
  if page_count > 0 and len(pages_list) > 0:
224
224
  result["pages"] = pages_list
225
225
  result["page_count"] = page_count
226
226
  result["pages_needing_ocr"] = page_analysis.get("pages_needing_ocr", 0)
227
227
  result["pages_with_text"] = page_analysis.get("pages_with_text", 0)
228
-
228
+
229
229
  # Override overall decision with page-level analysis only if data is valid
230
230
  if page_analysis.get("overall_needs_ocr") is not None:
231
231
  # Validate that page-level analysis is complete and consistent
@@ -134,8 +134,8 @@ def extract_native_data(
134
134
 
135
135
  # Format output
136
136
  return format_result(
137
- result,
138
- output_format=output_format,
137
+ result,
138
+ output_format=output_format,
139
139
  markdown_clean=markdown_clean,
140
- include_metadata=include_metadata
140
+ include_metadata=include_metadata,
141
141
  )
@@ -75,7 +75,7 @@ def calculate_confidence(
75
75
  text_quality: Quality of text (0.0-1.0), based on font size and clarity
76
76
  extraction_method: Method used ("pdfplumber" = 0.9, "pymupdf" = 0.8)
77
77
  element_type_certainty: How certain we are about classification (0.0-1.0)
78
- bbox_accuracy: How well-defined the bbox is (0.0-1.0)
78
+ bbox_accuracy: How well-defined the bbox is (0.0-1.0)
79
79
 
80
80
  Returns:
81
81
  Confidence score between 0.0 and 1.0
@@ -46,17 +46,17 @@ def format_as_json(result: ExtractionResult) -> Dict[str, Any]:
46
46
  def format_as_markdown(result: ExtractionResult, clean: bool = False) -> str:
47
47
  """
48
48
  Format result as LLM-ready markdown.
49
-
49
+
50
50
  Args:
51
51
  result: ExtractionResult to format
52
52
  clean: If True, output only content without metadata (file paths, confidence scores, etc.)
53
53
  If False, include all metadata (default: False for backward compatibility)
54
-
54
+
55
55
  Returns:
56
56
  Markdown string
57
57
  """
58
58
  lines = []
59
-
59
+
60
60
  # If clean mode, skip all metadata and just output content
61
61
  if clean:
62
62
  return _format_as_clean_markdown(result)
@@ -191,14 +191,14 @@ def _format_as_clean_markdown(result: ExtractionResult) -> str:
191
191
  Perfect for LLM consumption - just the text content.
192
192
  """
193
193
  lines = []
194
-
194
+
195
195
  # Tables - just the table content
196
196
  if result.tables:
197
197
  for table in result.tables:
198
198
  table_md = _format_table_as_markdown(table)
199
199
  lines.append(table_md)
200
200
  lines.append("")
201
-
201
+
202
202
  # Forms - just field names and values
203
203
  if result.forms:
204
204
  for form in result.forms:
@@ -207,7 +207,7 @@ def _format_as_clean_markdown(result: ExtractionResult) -> str:
207
207
  elif form.value:
208
208
  lines.append(form.value)
209
209
  lines.append("")
210
-
210
+
211
211
  # Elements (text content) - main content
212
212
  if result.elements:
213
213
  # Group by page
@@ -217,11 +217,11 @@ def _format_as_clean_markdown(result: ExtractionResult) -> str:
217
217
  if page_num not in elements_by_page:
218
218
  elements_by_page[page_num] = []
219
219
  elements_by_page[page_num].append(elem)
220
-
220
+
221
221
  # Sort pages
222
222
  for page_num in sorted(elements_by_page.keys()):
223
223
  page_elements = elements_by_page[page_num]
224
-
224
+
225
225
  # Sort by reading order if available
226
226
  if result.reading_order:
227
227
  page_elements.sort(
@@ -231,7 +231,7 @@ def _format_as_clean_markdown(result: ExtractionResult) -> str:
231
231
  else 9999
232
232
  )
233
233
  )
234
-
234
+
235
235
  for elem in page_elements:
236
236
  if elem.element_type == ElementType.TITLE:
237
237
  lines.append(f"# {elem.text}")
@@ -249,7 +249,7 @@ def _format_as_clean_markdown(result: ExtractionResult) -> str:
249
249
  elif elem.text:
250
250
  lines.append(elem.text)
251
251
  lines.append("")
252
-
252
+
253
253
  return "\n".join(lines).strip()
254
254
 
255
255