natural-pdf 25.3.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. examples/__init__.py +3 -0
  2. examples/another_exclusion_example.py +20 -0
  3. examples/basic_usage.py +190 -0
  4. examples/boundary_exclusion_test.py +137 -0
  5. examples/boundary_inclusion_fix_test.py +157 -0
  6. examples/chainable_layout_example.py +70 -0
  7. examples/color_basic_test.py +49 -0
  8. examples/color_name_example.py +71 -0
  9. examples/color_test.py +62 -0
  10. examples/debug_ocr.py +91 -0
  11. examples/direct_ocr_test.py +148 -0
  12. examples/direct_paddle_test.py +99 -0
  13. examples/direct_qa_example.py +165 -0
  14. examples/document_layout_analysis.py +123 -0
  15. examples/document_qa_example.py +185 -0
  16. examples/exclusion_count_debug.py +128 -0
  17. examples/exclusion_debug.py +107 -0
  18. examples/exclusion_example.py +150 -0
  19. examples/exclusion_optimization_example.py +190 -0
  20. examples/extract_text_test.py +128 -0
  21. examples/font_aware_example.py +101 -0
  22. examples/font_variant_example.py +124 -0
  23. examples/footer_overlap_test.py +124 -0
  24. examples/highlight_all_example.py +82 -0
  25. examples/highlight_attributes_test.py +114 -0
  26. examples/highlight_confidence_display.py +122 -0
  27. examples/highlight_demo.py +110 -0
  28. examples/highlight_float_test.py +71 -0
  29. examples/highlight_test.py +147 -0
  30. examples/highlighting_example.py +123 -0
  31. examples/image_width_example.py +84 -0
  32. examples/improved_api_example.py +128 -0
  33. examples/layout_confidence_display_test.py +65 -0
  34. examples/layout_confidence_test.py +82 -0
  35. examples/layout_coordinate_debug.py +258 -0
  36. examples/layout_highlight_test.py +77 -0
  37. examples/logging_example.py +70 -0
  38. examples/ocr_comprehensive.py +193 -0
  39. examples/ocr_debug_example.py +87 -0
  40. examples/ocr_default_test.py +97 -0
  41. examples/ocr_engine_comparison.py +235 -0
  42. examples/ocr_example.py +89 -0
  43. examples/ocr_simplified_params.py +79 -0
  44. examples/ocr_visualization.py +102 -0
  45. examples/ocr_visualization_test.py +121 -0
  46. examples/paddle_layout_example.py +315 -0
  47. examples/paddle_layout_simple.py +74 -0
  48. examples/paddleocr_example.py +224 -0
  49. examples/page_collection_example.py +103 -0
  50. examples/polygon_highlight_example.py +83 -0
  51. examples/position_methods_example.py +134 -0
  52. examples/region_boundary_test.py +73 -0
  53. examples/region_exclusion_test.py +149 -0
  54. examples/region_expand_example.py +109 -0
  55. examples/region_image_example.py +116 -0
  56. examples/region_ocr_test.py +119 -0
  57. examples/region_sections_example.py +115 -0
  58. examples/school_books.py +49 -0
  59. examples/school_books_all.py +52 -0
  60. examples/scouring.py +36 -0
  61. examples/section_extraction_example.py +232 -0
  62. examples/simple_document_qa.py +97 -0
  63. examples/spatial_navigation_example.py +108 -0
  64. examples/table_extraction_example.py +135 -0
  65. examples/table_structure_detection.py +155 -0
  66. examples/tatr_cells_test.py +56 -0
  67. examples/tatr_ocr_table_test.py +94 -0
  68. examples/text_search_example.py +122 -0
  69. examples/text_style_example.py +110 -0
  70. examples/tiny-text.py +61 -0
  71. examples/until_boundaries_example.py +156 -0
  72. examples/until_example.py +112 -0
  73. examples/very_basics.py +15 -0
  74. natural_pdf/__init__.py +55 -0
  75. natural_pdf/analyzers/__init__.py +9 -0
  76. natural_pdf/analyzers/document_layout.py +736 -0
  77. natural_pdf/analyzers/text_structure.py +153 -0
  78. natural_pdf/core/__init__.py +3 -0
  79. natural_pdf/core/page.py +2376 -0
  80. natural_pdf/core/pdf.py +572 -0
  81. natural_pdf/elements/__init__.py +3 -0
  82. natural_pdf/elements/base.py +553 -0
  83. natural_pdf/elements/collections.py +770 -0
  84. natural_pdf/elements/line.py +124 -0
  85. natural_pdf/elements/rect.py +122 -0
  86. natural_pdf/elements/region.py +1366 -0
  87. natural_pdf/elements/text.py +304 -0
  88. natural_pdf/ocr/__init__.py +62 -0
  89. natural_pdf/ocr/easyocr_engine.py +254 -0
  90. natural_pdf/ocr/engine.py +158 -0
  91. natural_pdf/ocr/paddleocr_engine.py +263 -0
  92. natural_pdf/qa/__init__.py +3 -0
  93. natural_pdf/qa/document_qa.py +405 -0
  94. natural_pdf/selectors/__init__.py +4 -0
  95. natural_pdf/selectors/parser.py +360 -0
  96. natural_pdf/templates/__init__.py +1 -0
  97. natural_pdf/templates/ocr_debug.html +517 -0
  98. natural_pdf/utils/__init__.py +4 -0
  99. natural_pdf/utils/highlighting.py +605 -0
  100. natural_pdf/utils/ocr.py +515 -0
  101. natural_pdf/utils/reading_order.py +227 -0
  102. natural_pdf/utils/visualization.py +151 -0
  103. natural_pdf-25.3.16.dist-info/LICENSE +21 -0
  104. natural_pdf-25.3.16.dist-info/METADATA +268 -0
  105. natural_pdf-25.3.16.dist-info/RECORD +109 -0
  106. natural_pdf-25.3.16.dist-info/WHEEL +5 -0
  107. natural_pdf-25.3.16.dist-info/top_level.txt +3 -0
  108. tests/__init__.py +3 -0
  109. tests/test_pdf.py +39 -0
@@ -0,0 +1,517 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>OCR Debug Report</title>
7
+ <style>
8
+ body {{
9
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
10
+ line-height: 1.6;
11
+ color: #333;
12
+ max-width: 1200px;
13
+ margin: 0 auto;
14
+ padding: 20px;
15
+ }}
16
+ h1, h2 {{
17
+ color: #2c3e50;
18
+ }}
19
+ .page-section {{
20
+ margin-bottom: 30px;
21
+ border: 1px solid #ddd;
22
+ border-radius: 4px;
23
+ padding: 15px;
24
+ background: #f8f9fa;
25
+ }}
26
+ .page-title {{
27
+ display: flex;
28
+ justify-content: space-between;
29
+ align-items: center;
30
+ margin-bottom: 15px;
31
+ }}
32
+ .page-controls {{
33
+ display: flex;
34
+ gap: 10px;
35
+ align-items: center;
36
+ }}
37
+ .controls {{
38
+ margin-bottom: 20px;
39
+ display: flex;
40
+ justify-content: space-between;
41
+ flex-wrap: wrap;
42
+ gap: 10px;
43
+ padding: 10px;
44
+ background: #f0f0f0;
45
+ border-radius: 4px;
46
+ }}
47
+ .filter-control {{
48
+ display: flex;
49
+ align-items: center;
50
+ gap: 8px;
51
+ }}
52
+ input, select, button {{
53
+ padding: 6px 12px;
54
+ border: 1px solid #ccc;
55
+ border-radius: 4px;
56
+ font-size: 14px;
57
+ }}
58
+ button {{
59
+ background: #4b6bfb;
60
+ color: white;
61
+ cursor: pointer;
62
+ }}
63
+ button:hover {{
64
+ background: #3b5de7;
65
+ }}
66
+ .region-table {{
67
+ width: 100%;
68
+ border-collapse: collapse;
69
+ }}
70
+ .region-table th,
71
+ .region-table td {{
72
+ padding: 8px;
73
+ border: 1px solid #ddd;
74
+ text-align: left;
75
+ vertical-align: top;
76
+ }}
77
+ .region-table th {{
78
+ background: #f2f2f2;
79
+ position: sticky;
80
+ top: 0;
81
+ z-index: 10;
82
+ }}
83
+ .region-image {{
84
+ width: 35%; /* Limit image cell width */
85
+ }}
86
+ .image-clip {{
87
+ position: relative;
88
+ overflow: hidden;
89
+ background-repeat: no-repeat;
90
+ border-radius: 3px;
91
+ box-shadow: 0 1px 3px rgba(0,0,0,0.2);
92
+ margin: 0 auto;
93
+ max-width: 350px; /* Maximum width */
94
+ max-height: 250px; /* Maximum height */
95
+ transform-origin: top left; /* For scaling */
96
+ }}
97
+ .confidence {{
98
+ width: 90px;
99
+ text-align: center;
100
+ white-space: nowrap;
101
+ }}
102
+ .confidence[data-level="high"] {{
103
+ background-color: rgba(0, 200, 0, 0.2);
104
+ }}
105
+ .confidence[data-level="medium"] {{
106
+ background-color: rgba(255, 200, 0, 0.2);
107
+ }}
108
+ .confidence[data-level="low"] {{
109
+ background-color: rgba(255, 0, 0, 0.2);
110
+ }}
111
+ .text-content {{
112
+ width: 60%;
113
+ }}
114
+ .text-content-input {{
115
+ width: 100%;
116
+ font-family: monospace;
117
+ padding: 8px;
118
+ line-height: 1.4;
119
+ white-space: pre-wrap;
120
+ word-break: break-all;
121
+ border: 1px solid #ddd;
122
+ border-radius: 4px;
123
+ resize: vertical;
124
+ }}
125
+ .text-content-input:focus {{
126
+ border-color: #4b6bfb;
127
+ outline: none;
128
+ box-shadow: 0 0 0 2px rgba(75, 107, 251, 0.25);
129
+ }}
130
+ .modified-status {{
131
+ text-align: center;
132
+ width: 80px;
133
+ }}
134
+ .modified-checkbox {{
135
+ width: 18px;
136
+ height: 18px;
137
+ cursor: not-allowed;
138
+ }}
139
+ .hidden {{
140
+ display: none;
141
+ }}
142
+ .toggle-btn {{
143
+ background: #eee;
144
+ color: #333;
145
+ border: 1px solid #ccc;
146
+ padding: 3px 8px;
147
+ border-radius: 3px;
148
+ cursor: pointer;
149
+ font-size: 12px;
150
+ }}
151
+ .toggle-btn:hover {{
152
+ background: #ddd;
153
+ }}
154
+ .export-btn {{
155
+ margin-left: auto;
156
+ }}
157
+ .page-image {{
158
+ max-width: 100%;
159
+ height: auto;
160
+ margin-bottom: 15px;
161
+ border: 1px solid #ddd;
162
+ display: none;
163
+ }}
164
+ .show {{
165
+ display: block;
166
+ }}
167
+
168
+ @media (max-width: 800px) {{
169
+ .region-table, .region-table tbody, .region-table tr, .region-table td, .region-table th {{
170
+ display: block;
171
+ }}
172
+ .region-table td {{
173
+ margin-bottom: 8px;
174
+ }}
175
+ .region-table th {{
176
+ position: static;
177
+ }}
178
+ }}
179
+ </style>
180
+ </head>
181
+ <body>
182
+ <h1>OCR Debug Report</h1>
183
+
184
+ <div class="controls">
185
+ <div class="filter-control">
186
+ <label for="confidence-filter">Min Confidence:</label>
187
+ <input type="range" id="confidence-filter" min="0" max="1" step="0.05" value="0">
188
+ <span id="confidence-value">0</span>
189
+ </div>
190
+
191
+ <div class="filter-control">
192
+ <label for="text-filter">Text Filter:</label>
193
+ <input type="text" id="text-filter" placeholder="Filter text...">
194
+ </div>
195
+
196
+ <div class="filter-control">
197
+ <label for="sort-by">Sort By:</label>
198
+ <select id="sort-by">
199
+ <option value="position">Position (default)</option>
200
+ <option value="confidence-asc">Confidence (Low to High)</option>
201
+ <option value="confidence-desc">Confidence (High to Low)</option>
202
+ <option value="text-length">Text Length</option>
203
+ </select>
204
+ </div>
205
+
206
+ <button id="export-json" class="export-btn">Export JSON</button>
207
+ </div>
208
+
209
+ <div id="pages-container">
210
+ <!-- Pages will be inserted here -->
211
+ </div>
212
+
213
+ <script>
214
+ // Main OCR data structure with pages and regions
215
+ const ocrData = {pages_data};
216
+
217
+ // ===== DOM Elements =====
218
+ const pagesContainer = document.getElementById('pages-container');
219
+ const confidenceFilter = document.getElementById('confidence-filter');
220
+ const confidenceValue = document.getElementById('confidence-value');
221
+ const textFilter = document.getElementById('text-filter');
222
+ const sortBySelect = document.getElementById('sort-by');
223
+ const exportButton = document.getElementById('export-json');
224
+
225
+ // ===== Rendering Functions =====
226
+
227
+ // Render a single page section with its regions
228
+ function renderPage(page, pageIndex) {{
229
+ const pageDiv = document.createElement('div');
230
+ pageDiv.className = 'page-section';
231
+ pageDiv.id = `page-${{pageIndex}}`;
232
+
233
+ // Page header with controls
234
+ const pageTitle = document.createElement('div');
235
+ pageTitle.className = 'page-title';
236
+
237
+ const pageHeading = document.createElement('h2');
238
+ pageHeading.textContent = `Page ${{page.page_number}}`;
239
+ pageTitle.appendChild(pageHeading);
240
+
241
+ const pageControls = document.createElement('div');
242
+ pageControls.className = 'page-controls';
243
+
244
+ const toggleImageBtn = document.createElement('button');
245
+ toggleImageBtn.className = 'toggle-btn';
246
+ toggleImageBtn.textContent = 'Show Full Image';
247
+ toggleImageBtn.onclick = () => toggleFullImage(pageIndex);
248
+ pageControls.appendChild(toggleImageBtn);
249
+
250
+ pageTitle.appendChild(pageControls);
251
+ pageDiv.appendChild(pageTitle);
252
+
253
+ // Full page image (hidden by default)
254
+ const pageImage = document.createElement('img');
255
+ pageImage.src = page.image;
256
+ pageImage.className = 'page-image';
257
+ pageImage.id = `page-image-${{pageIndex}}`;
258
+ pageImage.alt = `Page ${{page.page_number}}`;
259
+ pageDiv.appendChild(pageImage);
260
+
261
+ // Table for regions
262
+ const table = document.createElement('table');
263
+ table.className = 'region-table';
264
+
265
+ // Table header
266
+ const thead = document.createElement('thead');
267
+ const headerRow = document.createElement('tr');
268
+
269
+ const headers = ['Confidence', 'Text Region', 'Text Content'];
270
+ headers.forEach(header => {{
271
+ const th = document.createElement('th');
272
+ th.textContent = header;
273
+ headerRow.appendChild(th);
274
+ }});
275
+
276
+ thead.appendChild(headerRow);
277
+ table.appendChild(thead);
278
+
279
+ // Table body
280
+ const tbody = document.createElement('tbody');
281
+ tbody.id = `regions-${{pageIndex}}`;
282
+
283
+ // Render each region row
284
+ page.regions.forEach((region, regionIndex) => {{
285
+ const row = renderRegionRow(region, pageIndex, regionIndex, page.image);
286
+ tbody.appendChild(row);
287
+ }});
288
+
289
+ table.appendChild(tbody);
290
+ pageDiv.appendChild(table);
291
+
292
+ return pageDiv;
293
+ }}
294
+
295
+ // Render a single region row
296
+ function renderRegionRow(region, pageIndex, regionIndex, pageImage) {{
297
+ const row = document.createElement('tr');
298
+ row.className = 'region-row';
299
+ row.dataset.confidence = region.confidence;
300
+ row.dataset.text = region.ocr_text;
301
+ row.dataset.modified = (region.modified || false).toString();
302
+ row.dataset.regionId = `${{pageIndex}}-${{regionIndex}}`;
303
+
304
+ // Confidence cell
305
+ const confidenceCell = document.createElement('td');
306
+ confidenceCell.className = 'confidence';
307
+ confidenceCell.textContent = region.confidence.toFixed(2);
308
+
309
+ // Set color level based on confidence
310
+ if (region.confidence >= 0.8) {{
311
+ confidenceCell.dataset.level = 'high';
312
+ }} else if (region.confidence >= 0.5) {{
313
+ confidenceCell.dataset.level = 'medium';
314
+ }} else {{
315
+ confidenceCell.dataset.level = 'low';
316
+ }}
317
+
318
+ row.appendChild(confidenceCell);
319
+
320
+ // Image region cell
321
+ const imageCell = document.createElement('td');
322
+ imageCell.className = 'region-image';
323
+
324
+ const imageClip = document.createElement('div');
325
+ imageClip.className = 'image-clip';
326
+ imageClip.style.backgroundImage = `url('${{pageImage}}')`;
327
+
328
+ // Calculate dimensions (scaled by 2.0 to match the image scale)
329
+ const width = (region.bbox[2] - region.bbox[0]) * 2.0;
330
+ const height = (region.bbox[3] - region.bbox[1]) * 2.0;
331
+
332
+ // Calculate background position (negative of the top-left corner)
333
+ imageClip.style.backgroundPosition = `-${{region.bbox[0] * 2.0}}px -${{region.bbox[1] * 2.0}}px`;
334
+
335
+ // If the image is very large, we'll apply CSS transform scaling instead of
336
+ // changing the dimensions directly to maintain proper background position
337
+ const maxWidth = 350;
338
+ const maxHeight = 250;
339
+ let scale = 1;
340
+
341
+ if (width > maxWidth || height > maxHeight) {{
342
+ const scaleX = maxWidth / width;
343
+ const scaleY = maxHeight / height;
344
+ scale = Math.min(scaleX, scaleY);
345
+ imageClip.style.transform = `scale(${{scale}})`;
346
+ }}
347
+
348
+ // Set the final dimensions
349
+ imageClip.style.width = `${{width}}px`;
350
+ imageClip.style.height = `${{height}}px`;
351
+
352
+ imageCell.appendChild(imageClip);
353
+ row.appendChild(imageCell);
354
+
355
+ // Combined text content cell with textarea
356
+ const textCell = document.createElement('td');
357
+ textCell.className = 'text-content';
358
+
359
+ const textArea = document.createElement('textarea');
360
+ textArea.className = 'text-content-input';
361
+ textArea.value = region.ocr_text;
362
+ textArea.rows = Math.max(1, Math.ceil(region.ocr_text.length / 40)); // Approximate rows based on text length
363
+ textArea.dataset.pageIndex = pageIndex;
364
+ textArea.dataset.regionIndex = regionIndex;
365
+ textArea.dataset.originalText = region.ocr_text;
366
+
367
+ // Save changes to data structure
368
+ textArea.addEventListener('change', (e) => {{
369
+ const pIdx = parseInt(e.target.dataset.pageIndex);
370
+ const rIdx = parseInt(e.target.dataset.regionIndex);
371
+ ocrData.pages[pIdx].regions[rIdx].corrected_text = e.target.value;
372
+
373
+ // Update the modified status in the dataset
374
+ const isModified = e.target.value !== e.target.dataset.originalText;
375
+ ocrData.pages[pIdx].regions[rIdx].modified = isModified;
376
+
377
+ // Visual indication of modification through textarea style
378
+ if (isModified) {{
379
+ e.target.style.borderColor = '#4b6bfb';
380
+ e.target.style.backgroundColor = 'rgba(75, 107, 251, 0.05)';
381
+ }} else {{
382
+ e.target.style.borderColor = '#ddd';
383
+ e.target.style.backgroundColor = '';
384
+ }}
385
+ }});
386
+
387
+ textCell.appendChild(textArea);
388
+ row.appendChild(textCell);
389
+
390
+ // No Modified column needed
391
+
392
+ return row;
393
+ }}
394
+
395
+ // ===== Interactive Functions =====
396
+
397
+ // Toggle display of full page image
398
+ function toggleFullImage(pageIndex) {{
399
+ const image = document.getElementById(`page-image-${{pageIndex}}`);
400
+ const button = image.previousElementSibling.querySelector('.toggle-btn');
401
+
402
+ if (image.classList.contains('show')) {{
403
+ image.classList.remove('show');
404
+ button.textContent = 'Show Full Image';
405
+ }} else {{
406
+ image.classList.add('show');
407
+ button.textContent = 'Hide Full Image';
408
+ }}
409
+ }}
410
+
411
+ // Filter regions by confidence
412
+ function filterByConfidence(minConfidence) {{
413
+ document.querySelectorAll('.region-row').forEach(row => {{
414
+ const confidence = parseFloat(row.dataset.confidence);
415
+ if (confidence < minConfidence) {{
416
+ row.classList.add('hidden');
417
+ }} else {{
418
+ row.classList.remove('hidden');
419
+ }}
420
+ }});
421
+ }}
422
+
423
+ // Filter regions by text content
424
+ function filterByText(text) {{
425
+ const searchText = text.toLowerCase();
426
+ // If no search text, we don't need to do anything
427
+ if (!searchText) {{
428
+ document.querySelectorAll('.region-row').forEach(row => {{
429
+ row.classList.remove('hidden');
430
+ }});
431
+ return;
432
+ }}
433
+
434
+ // Filter based on current textarea content (not just original text)
435
+ document.querySelectorAll('.region-row').forEach(row => {{
436
+ const textarea = row.querySelector('.text-content-input');
437
+ const currentText = textarea ? textarea.value.toLowerCase() : row.dataset.text.toLowerCase();
438
+
439
+ if (!currentText.includes(searchText)) {{
440
+ row.classList.add('hidden');
441
+ }} else {{
442
+ row.classList.remove('hidden');
443
+ }}
444
+ }});
445
+ }}
446
+
447
+ // Sort regions by different criteria
448
+ function sortRegions(sortBy) {{
449
+ ocrData.pages.forEach((page, pageIndex) => {{
450
+ const tbody = document.getElementById(`regions-${{pageIndex}}`);
451
+ const rows = Array.from(tbody.querySelectorAll('.region-row'));
452
+
453
+ // Sort based on selected criterion
454
+ rows.sort((a, b) => {{
455
+ switch (sortBy) {{
456
+ case 'confidence-asc':
457
+ return parseFloat(a.dataset.confidence) - parseFloat(b.dataset.confidence);
458
+ case 'confidence-desc':
459
+ return parseFloat(b.dataset.confidence) - parseFloat(a.dataset.confidence);
460
+ case 'text-length':
461
+ return b.dataset.text.length - a.dataset.text.length;
462
+ case 'position':
463
+ default:
464
+ // Default sort by region ID (original position)
465
+ return a.dataset.regionId.localeCompare(b.dataset.regionId);
466
+ }}
467
+ }});
468
+
469
+ // Reinsert in sorted order
470
+ rows.forEach(row => tbody.appendChild(row));
471
+ }});
472
+ }}
473
+
474
+ // Export data as JSON
475
+ function exportJSON() {{
476
+ // Create a downloadable JSON with corrected text
477
+ const exportData = JSON.stringify(ocrData, null, 2);
478
+ const blob = new Blob([exportData], {{type: 'application/json'}});
479
+ const url = URL.createObjectURL(blob);
480
+
481
+ const a = document.createElement('a');
482
+ a.href = url;
483
+ a.download = 'ocr_debug_export.json';
484
+ document.body.appendChild(a);
485
+ a.click();
486
+ document.body.removeChild(a);
487
+ URL.revokeObjectURL(url);
488
+ }}
489
+
490
+ // ===== Event Listeners =====
491
+
492
+ confidenceFilter.addEventListener('input', (e) => {{
493
+ const value = parseFloat(e.target.value);
494
+ confidenceValue.textContent = value.toFixed(2);
495
+ filterByConfidence(value);
496
+ }});
497
+
498
+ textFilter.addEventListener('input', (e) => {{
499
+ filterByText(e.target.value);
500
+ }});
501
+
502
+ sortBySelect.addEventListener('change', (e) => {{
503
+ sortRegions(e.target.value);
504
+ }});
505
+
506
+ exportButton.addEventListener('click', exportJSON);
507
+
508
+ // ===== Initialize =====
509
+
510
+ // Render all pages
511
+ ocrData.pages.forEach((page, i) => {{
512
+ const pageElement = renderPage(page, i);
513
+ pagesContainer.appendChild(pageElement);
514
+ }});
515
+ </script>
516
+ </body>
517
+ </html>
@@ -0,0 +1,4 @@
1
+ """
2
+ Utility functions for natural-pdf.
3
+ """
4
+ from natural_pdf.utils.ocr import OCRManager