natural-pdf 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. docs/categorizing-documents/index.md +168 -0
  2. docs/data-extraction/index.md +87 -0
  3. docs/element-selection/index.ipynb +218 -164
  4. docs/element-selection/index.md +20 -0
  5. docs/index.md +19 -0
  6. docs/ocr/index.md +63 -16
  7. docs/tutorials/01-loading-and-extraction.ipynb +1713 -34
  8. docs/tutorials/02-finding-elements.ipynb +123 -46
  9. docs/tutorials/03-extracting-blocks.ipynb +24 -19
  10. docs/tutorials/04-table-extraction.ipynb +17 -12
  11. docs/tutorials/05-excluding-content.ipynb +37 -32
  12. docs/tutorials/06-document-qa.ipynb +36 -31
  13. docs/tutorials/07-layout-analysis.ipynb +45 -40
  14. docs/tutorials/07-working-with-regions.ipynb +61 -60
  15. docs/tutorials/08-spatial-navigation.ipynb +76 -71
  16. docs/tutorials/09-section-extraction.ipynb +160 -155
  17. docs/tutorials/10-form-field-extraction.ipynb +71 -66
  18. docs/tutorials/11-enhanced-table-processing.ipynb +11 -6
  19. docs/tutorials/12-ocr-integration.ipynb +3420 -312
  20. docs/tutorials/12-ocr-integration.md +68 -106
  21. docs/tutorials/13-semantic-search.ipynb +641 -251
  22. natural_pdf/__init__.py +2 -0
  23. natural_pdf/classification/manager.py +343 -0
  24. natural_pdf/classification/mixin.py +149 -0
  25. natural_pdf/classification/results.py +62 -0
  26. natural_pdf/collections/mixins.py +63 -0
  27. natural_pdf/collections/pdf_collection.py +321 -15
  28. natural_pdf/core/element_manager.py +67 -0
  29. natural_pdf/core/page.py +227 -64
  30. natural_pdf/core/pdf.py +387 -378
  31. natural_pdf/elements/collections.py +272 -41
  32. natural_pdf/elements/region.py +99 -15
  33. natural_pdf/elements/text.py +5 -2
  34. natural_pdf/exporters/paddleocr.py +1 -1
  35. natural_pdf/extraction/manager.py +134 -0
  36. natural_pdf/extraction/mixin.py +246 -0
  37. natural_pdf/extraction/result.py +37 -0
  38. natural_pdf/ocr/engine_easyocr.py +6 -3
  39. natural_pdf/ocr/ocr_manager.py +85 -25
  40. natural_pdf/ocr/ocr_options.py +33 -10
  41. natural_pdf/ocr/utils.py +14 -3
  42. natural_pdf/qa/document_qa.py +0 -4
  43. natural_pdf/selectors/parser.py +363 -238
  44. natural_pdf/templates/finetune/fine_tune_paddleocr.md +10 -5
  45. natural_pdf/utils/locks.py +8 -0
  46. natural_pdf/utils/text_extraction.py +52 -1
  47. natural_pdf/utils/tqdm_utils.py +43 -0
  48. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.8.dist-info}/METADATA +6 -1
  49. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.8.dist-info}/RECORD +52 -41
  50. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.8.dist-info}/WHEEL +1 -1
  51. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.8.dist-info}/licenses/LICENSE +0 -0
  52. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.8.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@
2
2
  "cells": [
3
3
  {
4
4
  "cell_type": "markdown",
5
- "id": "8b02fa9e",
5
+ "id": "96325b7e",
6
6
  "metadata": {},
7
7
  "source": [
8
8
  "# OCR Integration for Scanned Documents\n",
@@ -13,13 +13,13 @@
13
13
  {
14
14
  "cell_type": "code",
15
15
  "execution_count": 1,
16
- "id": "bde55ac1",
16
+ "id": "34cb9f5e",
17
17
  "metadata": {
18
18
  "execution": {
19
- "iopub.execute_input": "2025-04-21T21:32:06.104226Z",
20
- "iopub.status.busy": "2025-04-21T21:32:06.104019Z",
21
- "iopub.status.idle": "2025-04-21T21:32:06.108232Z",
22
- "shell.execute_reply": "2025-04-21T21:32:06.107754Z"
19
+ "iopub.execute_input": "2025-04-27T16:33:38.508832Z",
20
+ "iopub.status.busy": "2025-04-27T16:33:38.508415Z",
21
+ "iopub.status.idle": "2025-04-27T16:33:38.515643Z",
22
+ "shell.execute_reply": "2025-04-27T16:33:38.514609Z"
23
23
  }
24
24
  },
25
25
  "outputs": [],
@@ -30,13 +30,13 @@
30
30
  {
31
31
  "cell_type": "code",
32
32
  "execution_count": 2,
33
- "id": "5c624a53",
33
+ "id": "44287df0",
34
34
  "metadata": {
35
35
  "execution": {
36
- "iopub.execute_input": "2025-04-21T21:32:06.110125Z",
37
- "iopub.status.busy": "2025-04-21T21:32:06.109925Z",
38
- "iopub.status.idle": "2025-04-21T21:32:14.008764Z",
39
- "shell.execute_reply": "2025-04-21T21:32:14.008268Z"
36
+ "iopub.execute_input": "2025-04-27T16:33:38.518656Z",
37
+ "iopub.status.busy": "2025-04-27T16:33:38.518160Z",
38
+ "iopub.status.idle": "2025-04-27T16:33:44.450143Z",
39
+ "shell.execute_reply": "2025-04-27T16:33:44.449510Z"
40
40
  }
41
41
  },
42
42
  "outputs": [
@@ -65,458 +65,652 @@
65
65
  },
66
66
  {
67
67
  "cell_type": "markdown",
68
- "id": "461a5090",
68
+ "id": "6db0cfaf",
69
69
  "metadata": {},
70
70
  "source": [
71
- "## Finding Text Elements with OCR"
71
+ "## Applying OCR and Finding Elements\n",
72
+ "\n",
73
+ "The core method is `page.apply_ocr()`. This runs the OCR process and adds `TextElement` objects to the page. You can specify the engine and languages.\n",
74
+ "\n",
75
+ "**Note:** Re-applying OCR to the same page or region will automatically remove any previously generated OCR elements for that area before adding the new ones."
72
76
  ]
73
77
  },
74
78
  {
75
79
  "cell_type": "code",
76
80
  "execution_count": 3,
77
- "id": "895e3c2c",
81
+ "id": "129d2d88",
78
82
  "metadata": {
79
83
  "execution": {
80
- "iopub.execute_input": "2025-04-21T21:32:14.010745Z",
81
- "iopub.status.busy": "2025-04-21T21:32:14.010324Z",
82
- "iopub.status.idle": "2025-04-21T21:32:28.416856Z",
83
- "shell.execute_reply": "2025-04-21T21:32:28.416360Z"
84
+ "iopub.execute_input": "2025-04-27T16:33:44.453600Z",
85
+ "iopub.status.busy": "2025-04-27T16:33:44.452871Z",
86
+ "iopub.status.idle": "2025-04-27T16:34:08.411433Z",
87
+ "shell.execute_reply": "2025-04-27T16:34:08.411148Z"
84
88
  }
85
89
  },
86
90
  "outputs": [
91
+ {
92
+ "data": {
93
+ "application/vnd.jupyter.widget-view+json": {
94
+ "model_id": "cfbf78084dd04ad0b51e62e5b1bd0e14",
95
+ "version_major": 2,
96
+ "version_minor": 0
97
+ },
98
+ "text/plain": [
99
+ "Rendering pages: 0%| | 0/1 [00:00<?, ?it/s]"
100
+ ]
101
+ },
102
+ "metadata": {},
103
+ "output_type": "display_data"
104
+ },
87
105
  {
88
106
  "name": "stderr",
89
107
  "output_type": "stream",
90
108
  "text": [
91
- "\u001b[2m2025-04-21T21:32:14.064078Z\u001b[0m [\u001b[33m\u001b[1mwarning \u001b[0m] \u001b[1mUsing CPU. Note: This module is much faster with a GPU.\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m71\u001b[0m \u001b[36mmodule\u001b[0m=\u001b[35measyocr.easyocr\u001b[0m\n"
109
+ "\u001b[2m2025-04-27T16:33:44.559124Z\u001b[0m [\u001b[33m\u001b[1mwarning \u001b[0m] \u001b[1mUsing CPU. Note: This module is much faster with a GPU.\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m71\u001b[0m \u001b[36mmodule\u001b[0m=\u001b[35measyocr.easyocr\u001b[0m\n"
92
110
  ]
93
111
  },
94
112
  {
95
113
  "name": "stderr",
96
114
  "output_type": "stream",
97
115
  "text": [
98
- "[2025-04-21 17:32:14,064] [ WARNING] easyocr.py:71 - Using CPU. Note: This module is much faster with a GPU.\n"
116
+ "[2025-04-27 12:33:44,559] [ WARNING] easyocr.py:71 - Using CPU. Note: This module is much faster with a GPU.\n"
99
117
  ]
100
118
  },
101
119
  {
102
- "data": {
103
- "text/plain": [
104
- "<ElementCollection[TextElement](count=47)>"
105
- ]
106
- },
107
- "execution_count": 3,
108
- "metadata": {},
109
- "output_type": "execute_result"
110
- }
111
- ],
112
- "source": [
113
- "# Convert text-as-image to text elements\n",
114
- "page.apply_ocr()\n",
115
- "\n",
116
- "# Select all text pieces on the page\n",
117
- "text_elements = page.find_all('text')\n",
118
- "f\"Found {len(text_elements)} text elements\"\n",
119
- "\n",
120
- "# Visualize the elements\n",
121
- "text_elements.highlight()"
122
- ]
123
- },
124
- {
125
- "cell_type": "markdown",
126
- "id": "36051d57",
127
- "metadata": {},
128
- "source": [
129
- "## OCR Configuration Options"
130
- ]
131
- },
132
- {
133
- "cell_type": "code",
134
- "execution_count": 4,
135
- "id": "d4461746",
136
- "metadata": {
137
- "execution": {
138
- "iopub.execute_input": "2025-04-21T21:32:28.418763Z",
139
- "iopub.status.busy": "2025-04-21T21:32:28.418565Z",
140
- "iopub.status.idle": "2025-04-21T21:32:28.423024Z",
141
- "shell.execute_reply": "2025-04-21T21:32:28.422671Z"
142
- }
143
- },
144
- "outputs": [
120
+ "name": "stdout",
121
+ "output_type": "stream",
122
+ "text": [
123
+ "Found 47 text elements using default OCR\n"
124
+ ]
125
+ },
145
126
  {
146
127
  "data": {
128
+ "application/vnd.jupyter.widget-view+json": {
129
+ "model_id": "2610164fb3f7466985a46215b5b2bfa8",
130
+ "version_major": 2,
131
+ "version_minor": 0
132
+ },
147
133
  "text/plain": [
148
- "' \\n \\n ...'"
134
+ "Rendering pages: 0%| | 0/1 [00:00<?, ?it/s]"
149
135
  ]
150
136
  },
151
- "execution_count": 4,
152
137
  "metadata": {},
153
- "output_type": "execute_result"
154
- }
155
- ],
156
- "source": [
157
- "# Set OCR configuration for better results\n",
158
- "page.ocr_config = {\n",
159
- " 'language': 'eng', # English\n",
160
- " 'dpi': 300, # Higher resolution\n",
161
- "}\n",
162
- "\n",
163
- "# Extract text with the improved configuration\n",
164
- "improved_text = page.extract_text()\n",
165
- "\n",
166
- "# Preview the text\n",
167
- "improved_text[:200] + \"...\" if len(improved_text) > 200 else improved_text"
168
- ]
169
- },
170
- {
171
- "cell_type": "markdown",
172
- "id": "d5a96ac7",
173
- "metadata": {},
174
- "source": [
175
- "## Working with Multi-language Documents"
176
- ]
177
- },
178
- {
179
- "cell_type": "code",
180
- "execution_count": 5,
181
- "id": "9fa156f5",
182
- "metadata": {
183
- "execution": {
184
- "iopub.execute_input": "2025-04-21T21:32:28.424374Z",
185
- "iopub.status.busy": "2025-04-21T21:32:28.424235Z",
186
- "iopub.status.idle": "2025-04-21T21:32:28.428114Z",
187
- "shell.execute_reply": "2025-04-21T21:32:28.427816Z"
188
- }
189
- },
190
- "outputs": [
138
+ "output_type": "display_data"
139
+ },
140
+ {
141
+ "name": "stdout",
142
+ "output_type": "stream",
143
+ "text": [
144
+ "[2025/04/27 12:33:56] ppocr WARNING: Since the angle classifier is not initialized, it will not be used during the forward process\n"
145
+ ]
146
+ },
191
147
  {
192
148
  "data": {
149
+ "application/vnd.jupyter.widget-view+json": {
150
+ "model_id": "5e40bef048f3441c99ae91260a13f545",
151
+ "version_major": 2,
152
+ "version_minor": 0
153
+ },
193
154
  "text/plain": [
194
- "' \\n \\n '"
155
+ "Rendering pages: 0%| | 0/1 [00:00<?, ?it/s]"
195
156
  ]
196
157
  },
197
- "execution_count": 5,
198
158
  "metadata": {},
199
- "output_type": "execute_result"
200
- }
201
- ],
202
- "source": [
203
- "# Configure for multiple languages\n",
204
- "page.ocr_config = {\n",
205
- " 'language': 'eng+fra+deu', # English, French, German\n",
206
- " 'dpi': 300\n",
207
- "}\n",
208
- "\n",
209
- "# Extract text with multi-language support\n",
210
- "multilang_text = page.extract_text()\n",
211
- "multilang_text[:200]"
212
- ]
213
- },
214
- {
215
- "cell_type": "markdown",
216
- "id": "d3ccf43f",
217
- "metadata": {},
218
- "source": [
219
- "## Extracting Tables from Scanned Documents"
220
- ]
221
- },
222
- {
223
- "cell_type": "code",
224
- "execution_count": 6,
225
- "id": "ee7a7e7d",
226
- "metadata": {
227
- "execution": {
228
- "iopub.execute_input": "2025-04-21T21:32:28.429414Z",
229
- "iopub.status.busy": "2025-04-21T21:32:28.429283Z",
230
- "iopub.status.idle": "2025-04-21T21:32:30.754086Z",
231
- "shell.execute_reply": "2025-04-21T21:32:30.753700Z"
232
- }
233
- },
234
- "outputs": [
159
+ "output_type": "display_data"
160
+ },
235
161
  {
236
- "name": "stderr",
162
+ "name": "stdout",
237
163
  "output_type": "stream",
238
164
  "text": [
239
- "\u001b[2m2025-04-21T21:32:28.446098Z\u001b[0m [\u001b[33m\u001b[1mwarning \u001b[0m] \u001b[1mGOOGLE_API_KEY environment variable not set. Gemini detector (via OpenAI lib) will not be available.\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m72\u001b[0m \u001b[36mmodule\u001b[0m=\u001b[35mnatural_pdf.analyzers.layout.gemini\u001b[0m\n"
165
+ "Loaded detection model s3://text_detection/2025_02_18 on device mps with dtype torch.float16\n"
240
166
  ]
241
167
  },
242
168
  {
243
- "name": "stderr",
169
+ "name": "stdout",
244
170
  "output_type": "stream",
245
171
  "text": [
246
- "[2025-04-21 17:32:28,446] [ WARNING] gemini.py:72 - GOOGLE_API_KEY environment variable not set. Gemini detector (via OpenAI lib) will not be available.\n"
172
+ "Loaded recognition model s3://text_recognition/2025_02_18 on device mps with dtype torch.float16\n"
247
173
  ]
248
174
  },
175
+ {
176
+ "data": {
177
+ "text/html": [
178
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">╭─────────────────────────────── </span><span style=\"color: #800000; text-decoration-color: #800000; font-weight: bold\">Traceback </span><span style=\"color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold\">(most recent call last)</span><span style=\"color: #800000; text-decoration-color: #800000\"> ────────────────────────────────╮</span>\n",
179
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/Users/soma/Development/natural-pdf/natural_pdf/ocr/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">ocr_manager.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">195</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">apply_ocr</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
180
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
181
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">192 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ │ </span>logger.debug(<span style=\"color: #808000; text-decoration-color: #808000\">f\"[{</span>thread_id<span style=\"color: #808000; text-decoration-color: #808000\">}] Acquired inference lock for {</span>selected_engin <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
182
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">193 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ │ </span>inference_start_time = time.monotonic() <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
183
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">194 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ │ </span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
184
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>195 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ │ </span>results = engine_instance.process_image( <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
185
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">196 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ │ │ </span>images=images, <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
186
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">197 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ │ │ </span>languages=languages, <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
187
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">198 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ │ │ </span>min_confidence=min_confidence, <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
188
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
189
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">╭─────────────────────────────────────────── locals ───────────────────────────────────────────╮</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
190
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> detect_only = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">False</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
191
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> device = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
192
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> e = <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">AssertionError</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">'You need to pass in one list of languages for each</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
193
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">image'</span><span style=\"font-weight: bold\">)</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
194
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> engine = <span style=\"color: #808000; text-decoration-color: #808000\">'surya'</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
195
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> engine_instance = <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">natural_pdf.ocr.engine_surya.SuryaOCREngine</span><span style=\"color: #000000; text-decoration-color: #000000\"> object at </span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
196
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x35512cfd0</span><span style=\"font-weight: bold\">&gt;</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
197
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> final_options = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
198
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> image_dims = <span style=\"font-weight: bold\">[</span><span style=\"color: #808000; text-decoration-color: #808000\">'1275x1651'</span><span style=\"font-weight: bold\">]</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
199
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> images = <span style=\"font-weight: bold\">[&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">PIL.Image.Image</span><span style=\"color: #000000; text-decoration-color: #000000\"> image </span><span style=\"color: #808000; text-decoration-color: #808000\">mode</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">RGB</span><span style=\"color: #000000; text-decoration-color: #000000\"> </span><span style=\"color: #808000; text-decoration-color: #808000\">size</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">1275x1651</span><span style=\"color: #000000; text-decoration-color: #000000\"> at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x35512CCD0</span><span style=\"font-weight: bold\">&gt;]</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
200
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> inference_acquired_time = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">272326.616208958</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
201
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> inference_lock = <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">unlocked</span><span style=\"color: #000000; text-decoration-color: #000000\"> _thread.lock object at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x35511eec0</span><span style=\"font-weight: bold\">&gt;</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
202
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> inference_start_time = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">272326.616209666</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
203
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> inference_wait_start = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">272326.616208708</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
204
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> is_batch = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">True</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
205
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> languages = <span style=\"font-weight: bold\">[</span><span style=\"color: #808000; text-decoration-color: #808000\">'en'</span>, <span style=\"color: #808000; text-decoration-color: #808000\">'de'</span><span style=\"font-weight: bold\">]</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
206
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> min_confidence = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
207
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> options = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
208
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> processing_mode = <span style=\"color: #808000; text-decoration-color: #808000\">'batch'</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
209
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> selected_engine_name = <span style=\"color: #808000; text-decoration-color: #808000\">'surya'</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
210
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> self = <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">natural_pdf.ocr.ocr_manager.OCRManager</span><span style=\"color: #000000; text-decoration-color: #000000\"> object at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x104beb580</span><span style=\"font-weight: bold\">&gt;</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
211
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> thread_id = <span style=\"color: #808000; text-decoration-color: #808000\">'MainThread'</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
212
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">╰──────────────────────────────────────────────────────────────────────────────────────────────╯</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
213
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
214
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/Users/soma/Development/natural-pdf/natural_pdf/ocr/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">engine.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">117</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">process_image</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
215
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
216
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">114 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span>processed_img = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._preprocess_image(img) <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
217
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">115 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
218
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">116 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># Process the image with the engine-specific implementation</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
219
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span>117 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span>raw_results = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._process_single_image(processed_img, detect_only, options <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
220
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">118 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
221
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">119 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># Convert results to standardized format</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
222
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">120 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span>text_regions = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._standardize_results(raw_results, effective_confidence, <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
223
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
224
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">╭────────────────────────────────────────── locals ──────────────────────────────────────────╮</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
225
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> detect_only = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">False</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
226
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> device = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
227
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> effective_confidence = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">0.2</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
228
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> effective_device = <span style=\"color: #808000; text-decoration-color: #808000\">'cpu'</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
229
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> effective_languages = <span style=\"font-weight: bold\">[</span><span style=\"color: #808000; text-decoration-color: #808000\">'en'</span>, <span style=\"color: #808000; text-decoration-color: #808000\">'de'</span><span style=\"font-weight: bold\">]</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
230
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> image_batch = <span style=\"font-weight: bold\">[&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">PIL.Image.Image</span><span style=\"color: #000000; text-decoration-color: #000000\"> image </span><span style=\"color: #808000; text-decoration-color: #808000\">mode</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">RGB</span><span style=\"color: #000000; text-decoration-color: #000000\"> </span><span style=\"color: #808000; text-decoration-color: #808000\">size</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">1275x1651</span><span style=\"color: #000000; text-decoration-color: #000000\"> at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x35512CCD0</span><span style=\"font-weight: bold\">&gt;]</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
231
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> images = <span style=\"font-weight: bold\">[&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">PIL.Image.Image</span><span style=\"color: #000000; text-decoration-color: #000000\"> image </span><span style=\"color: #808000; text-decoration-color: #808000\">mode</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">RGB</span><span style=\"color: #000000; text-decoration-color: #000000\"> </span><span style=\"color: #808000; text-decoration-color: #808000\">size</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">1275x1651</span><span style=\"color: #000000; text-decoration-color: #000000\"> at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x35512CCD0</span><span style=\"font-weight: bold\">&gt;]</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
232
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> img = <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">PIL.Image.Image</span><span style=\"color: #000000; text-decoration-color: #000000\"> image </span><span style=\"color: #808000; text-decoration-color: #808000\">mode</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">RGB</span><span style=\"color: #000000; text-decoration-color: #000000\"> </span><span style=\"color: #808000; text-decoration-color: #808000\">size</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">1275x1651</span><span style=\"color: #000000; text-decoration-color: #000000\"> at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x35512CCD0</span><span style=\"font-weight: bold\">&gt;</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
233
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> languages = <span style=\"font-weight: bold\">[</span><span style=\"color: #808000; text-decoration-color: #808000\">'en'</span>, <span style=\"color: #808000; text-decoration-color: #808000\">'de'</span><span style=\"font-weight: bold\">]</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
234
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> min_confidence = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
235
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> options = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
236
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> processed_img = <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">PIL.Image.Image</span><span style=\"color: #000000; text-decoration-color: #000000\"> image </span><span style=\"color: #808000; text-decoration-color: #808000\">mode</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">RGB</span><span style=\"color: #000000; text-decoration-color: #000000\"> </span><span style=\"color: #808000; text-decoration-color: #808000\">size</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">1275x1651</span><span style=\"color: #000000; text-decoration-color: #000000\"> at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x35512CCD0</span><span style=\"font-weight: bold\">&gt;</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
237
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> results = <span style=\"font-weight: bold\">[]</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
238
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> self = <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">natural_pdf.ocr.engine_surya.SuryaOCREngine</span><span style=\"color: #000000; text-decoration-color: #000000\"> object at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x35512cfd0</span><span style=\"font-weight: bold\">&gt;</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
239
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> single_image = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">False</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
240
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">╰────────────────────────────────────────────────────────────────────────────────────────────╯</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
241
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
242
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/Users/soma/Development/natural-pdf/natural_pdf/ocr/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">engine_surya.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">71</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">_process_single_image</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
243
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
244
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 68 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> detect_only: <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
245
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 69 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span>results = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._detection_predictor(images=[image]) <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
246
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 70 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">else</span>: <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
247
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 71 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span>results = <span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._recognition_predictor( <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
248
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 72 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ │ </span>images=[image], <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
249
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 73 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ │ </span>langs=langs, <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"># Use the languages set during initialization</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
250
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 74 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ │ </span>det_predictor=<span style=\"color: #00ffff; text-decoration-color: #00ffff\">self</span>._detection_predictor, <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
251
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
252
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">╭───────────────────────────────────── locals ──────────────────────────────────────╮</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
253
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> detect_only = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">False</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
254
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> image = <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">PIL.Image.Image</span><span style=\"color: #000000; text-decoration-color: #000000\"> image </span><span style=\"color: #808000; text-decoration-color: #808000\">mode</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">RGB</span><span style=\"color: #000000; text-decoration-color: #000000\"> </span><span style=\"color: #808000; text-decoration-color: #808000\">size</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">1275x1651</span><span style=\"color: #000000; text-decoration-color: #000000\"> at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x35512CCD0</span><span style=\"font-weight: bold\">&gt;</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
255
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> langs = <span style=\"font-weight: bold\">[[</span><span style=\"color: #808000; text-decoration-color: #808000\">'en'</span><span style=\"font-weight: bold\">]</span>, <span style=\"font-weight: bold\">[</span><span style=\"color: #808000; text-decoration-color: #808000\">'de'</span><span style=\"font-weight: bold\">]]</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
256
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> options = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
257
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> self = <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">natural_pdf.ocr.engine_surya.SuryaOCREngine</span><span style=\"color: #000000; text-decoration-color: #000000\"> object at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x35512cfd0</span><span style=\"font-weight: bold\">&gt;</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
258
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">╰───────────────────────────────────────────────────────────────────────────────────╯</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
259
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
260
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #bfbf7f; text-decoration-color: #bfbf7f\">/Users/soma/.pyenv/versions/3.10.13/lib/python3.10/site-packages/surya/recognition/</span><span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">__init__.py</span>:<span style=\"color: #0000ff; text-decoration-color: #0000ff\">4</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
261
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">4</span> in <span style=\"color: #00ff00; text-decoration-color: #00ff00\">__call__</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
262
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
263
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 41 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span>polygons: List[List[List[List[<span style=\"color: #00ffff; text-decoration-color: #00ffff\">int</span>]]]] | <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>, <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
264
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 42 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span>sort_lines: <span style=\"color: #00ffff; text-decoration-color: #00ffff\">bool</span> = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">True</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
265
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 43 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ </span>) -&gt; List[OCRResult]: <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
266
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">❱ </span> 44 <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">assert</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">len</span>(images) == <span style=\"color: #00ffff; text-decoration-color: #00ffff\">len</span>(langs), <span style=\"color: #808000; text-decoration-color: #808000\">\"You need to pass in one list of languages</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
267
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 45 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span>images = convert_if_not_rgb(images) <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
268
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 46 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">if</span> highres_images <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">is</span> <span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">not</span> <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span>: <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
269
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 47 </span><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">│ │ │ │ </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">assert</span> <span style=\"color: #00ffff; text-decoration-color: #00ffff\">len</span>(images) == <span style=\"color: #00ffff; text-decoration-color: #00ffff\">len</span>(highres_images), <span style=\"color: #808000; text-decoration-color: #808000\">\"You need to pass in one high</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
270
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
271
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">╭───────────────────────────────────────── locals ──────────────────────────────────────────╮</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
272
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> bboxes = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
273
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> det_predictor = <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">surya.detection.DetectionPredictor</span><span style=\"color: #000000; text-decoration-color: #000000\"> object at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x3550b5390</span><span style=\"font-weight: bold\">&gt;</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
274
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> detection_batch_size = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
275
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> highres_images = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
276
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> images = <span style=\"font-weight: bold\">[&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">PIL.Image.Image</span><span style=\"color: #000000; text-decoration-color: #000000\"> image </span><span style=\"color: #808000; text-decoration-color: #808000\">mode</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">RGB</span><span style=\"color: #000000; text-decoration-color: #000000\"> </span><span style=\"color: #808000; text-decoration-color: #808000\">size</span><span style=\"color: #000000; text-decoration-color: #000000\">=</span><span style=\"color: #800080; text-decoration-color: #800080\">1275x1651</span><span style=\"color: #000000; text-decoration-color: #000000\"> at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x35512CCD0</span><span style=\"font-weight: bold\">&gt;]</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
277
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> langs = <span style=\"font-weight: bold\">[[</span><span style=\"color: #808000; text-decoration-color: #808000\">'en'</span><span style=\"font-weight: bold\">]</span>, <span style=\"font-weight: bold\">[</span><span style=\"color: #808000; text-decoration-color: #808000\">'de'</span><span style=\"font-weight: bold\">]]</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
278
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> polygons = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
279
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> recognition_batch_size = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">None</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
280
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> self = <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">surya.recognition.RecognitionPredictor</span><span style=\"color: #000000; text-decoration-color: #000000\"> object at </span><span style=\"color: #0000ff; text-decoration-color: #0000ff\">0x35512c5b0</span><span style=\"font-weight: bold\">&gt;</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
281
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> sort_lines = <span style=\"color: #0000ff; text-decoration-color: #0000ff\">True</span> <span style=\"color: #808000; text-decoration-color: #808000\">│</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
282
+ "<span style=\"color: #800000; text-decoration-color: #800000\">│</span> <span style=\"color: #808000; text-decoration-color: #808000\">╰───────────────────────────────────────────────────────────────────────────────────────────╯</span> <span style=\"color: #800000; text-decoration-color: #800000\">│</span>\n",
283
+ "<span style=\"color: #800000; text-decoration-color: #800000\">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
284
+ "<span style=\"color: #ff0000; text-decoration-color: #ff0000; font-weight: bold\">AssertionError: </span>You need to pass in one list of languages for each image\n",
285
+ "</pre>\n"
286
+ ],
287
+ "text/plain": [
288
+ "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
289
+ "\u001b[31m│\u001b[0m \u001b[2;33m/Users/soma/Development/natural-pdf/natural_pdf/ocr/\u001b[0m\u001b[1;33mocr_manager.py\u001b[0m:\u001b[94m195\u001b[0m in \u001b[92mapply_ocr\u001b[0m \u001b[31m│\u001b[0m\n",
290
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
291
+ "\u001b[31m│\u001b[0m \u001b[2m192 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mlogger.debug(\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m[\u001b[0m\u001b[33m{\u001b[0mthread_id\u001b[33m}\u001b[0m\u001b[33m] Acquired inference lock for \u001b[0m\u001b[33m{\u001b[0mselected_engin \u001b[31m│\u001b[0m\n",
292
+ "\u001b[31m│\u001b[0m \u001b[2m193 \u001b[0m\u001b[2m│ │ │ │ \u001b[0minference_start_time = time.monotonic() \u001b[31m│\u001b[0m\n",
293
+ "\u001b[31m│\u001b[0m \u001b[2m194 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n",
294
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m195 \u001b[2m│ │ │ │ \u001b[0mresults = engine_instance.process_image( \u001b[31m│\u001b[0m\n",
295
+ "\u001b[31m│\u001b[0m \u001b[2m196 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mimages=images, \u001b[31m│\u001b[0m\n",
296
+ "\u001b[31m│\u001b[0m \u001b[2m197 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mlanguages=languages, \u001b[31m│\u001b[0m\n",
297
+ "\u001b[31m│\u001b[0m \u001b[2m198 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mmin_confidence=min_confidence, \u001b[31m│\u001b[0m\n",
298
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
299
+ "\u001b[31m│\u001b[0m \u001b[33m╭─\u001b[0m\u001b[33m──────────────────────────────────────────\u001b[0m\u001b[33m locals \u001b[0m\u001b[33m──────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m \u001b[31m│\u001b[0m\n",
300
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m detect_only = \u001b[94mFalse\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
301
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m device = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
302
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m e = \u001b[1;35mAssertionError\u001b[0m\u001b[1m(\u001b[0m\u001b[33m'You need to pass in one list of languages for each\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
303
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m \u001b[33mimage'\u001b[0m\u001b[1m)\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
304
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m engine = \u001b[33m'surya'\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
305
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m engine_instance = \u001b[1m<\u001b[0m\u001b[1;95mnatural_pdf.ocr.engine_surya.SuryaOCREngine\u001b[0m\u001b[39m object at \u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
306
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m \u001b[94m0x35512cfd0\u001b[0m\u001b[1m>\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
307
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m final_options = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
308
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m image_dims = \u001b[1m[\u001b[0m\u001b[33m'1275x1651'\u001b[0m\u001b[1m]\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
309
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m images = \u001b[1m[\u001b[0m\u001b[1m<\u001b[0m\u001b[1;95mPIL.Image.Image\u001b[0m\u001b[39m image \u001b[0m\u001b[33mmode\u001b[0m\u001b[39m=\u001b[0m\u001b[35mRGB\u001b[0m\u001b[39m \u001b[0m\u001b[33msize\u001b[0m\u001b[39m=\u001b[0m\u001b[35m1275x1651\u001b[0m\u001b[39m at \u001b[0m\u001b[94m0x35512CCD0\u001b[0m\u001b[1m>\u001b[0m\u001b[1m]\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
310
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m inference_acquired_time = \u001b[94m272326.616208958\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
311
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m inference_lock = \u001b[1m<\u001b[0m\u001b[1;95munlocked\u001b[0m\u001b[39m _thread.lock object at \u001b[0m\u001b[94m0x35511eec0\u001b[0m\u001b[1m>\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
312
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m inference_start_time = \u001b[94m272326.616209666\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
313
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m inference_wait_start = \u001b[94m272326.616208708\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
314
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m is_batch = \u001b[94mTrue\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
315
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m languages = \u001b[1m[\u001b[0m\u001b[33m'en'\u001b[0m, \u001b[33m'de'\u001b[0m\u001b[1m]\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
316
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m min_confidence = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
317
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m options = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
318
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m processing_mode = \u001b[33m'batch'\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
319
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m selected_engine_name = \u001b[33m'surya'\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
320
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m self = \u001b[1m<\u001b[0m\u001b[1;95mnatural_pdf.ocr.ocr_manager.OCRManager\u001b[0m\u001b[39m object at \u001b[0m\u001b[94m0x104beb580\u001b[0m\u001b[1m>\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
321
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m thread_id = \u001b[33m'MainThread'\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
322
+ "\u001b[31m│\u001b[0m \u001b[33m╰──────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[31m│\u001b[0m\n",
323
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
324
+ "\u001b[31m│\u001b[0m \u001b[2;33m/Users/soma/Development/natural-pdf/natural_pdf/ocr/\u001b[0m\u001b[1;33mengine.py\u001b[0m:\u001b[94m117\u001b[0m in \u001b[92mprocess_image\u001b[0m \u001b[31m│\u001b[0m\n",
325
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
326
+ "\u001b[31m│\u001b[0m \u001b[2m114 \u001b[0m\u001b[2m│ │ │ \u001b[0mprocessed_img = \u001b[96mself\u001b[0m._preprocess_image(img) \u001b[31m│\u001b[0m\n",
327
+ "\u001b[31m│\u001b[0m \u001b[2m115 \u001b[0m\u001b[2m│ │ │ \u001b[0m \u001b[31m│\u001b[0m\n",
328
+ "\u001b[31m│\u001b[0m \u001b[2m116 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[2m# Process the image with the engine-specific implementation\u001b[0m \u001b[31m│\u001b[0m\n",
329
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m117 \u001b[2m│ │ │ \u001b[0mraw_results = \u001b[96mself\u001b[0m._process_single_image(processed_img, detect_only, options \u001b[31m│\u001b[0m\n",
330
+ "\u001b[31m│\u001b[0m \u001b[2m118 \u001b[0m\u001b[2m│ │ │ \u001b[0m \u001b[31m│\u001b[0m\n",
331
+ "\u001b[31m│\u001b[0m \u001b[2m119 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[2m# Convert results to standardized format\u001b[0m \u001b[31m│\u001b[0m\n",
332
+ "\u001b[31m│\u001b[0m \u001b[2m120 \u001b[0m\u001b[2m│ │ │ \u001b[0mtext_regions = \u001b[96mself\u001b[0m._standardize_results(raw_results, effective_confidence, \u001b[31m│\u001b[0m\n",
333
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
334
+ "\u001b[31m│\u001b[0m \u001b[33m╭─\u001b[0m\u001b[33m─────────────────────────────────────────\u001b[0m\u001b[33m locals \u001b[0m\u001b[33m─────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m \u001b[31m│\u001b[0m\n",
335
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m detect_only = \u001b[94mFalse\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
336
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m device = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
337
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m effective_confidence = \u001b[94m0.2\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
338
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m effective_device = \u001b[33m'cpu'\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
339
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m effective_languages = \u001b[1m[\u001b[0m\u001b[33m'en'\u001b[0m, \u001b[33m'de'\u001b[0m\u001b[1m]\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
340
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m image_batch = \u001b[1m[\u001b[0m\u001b[1m<\u001b[0m\u001b[1;95mPIL.Image.Image\u001b[0m\u001b[39m image \u001b[0m\u001b[33mmode\u001b[0m\u001b[39m=\u001b[0m\u001b[35mRGB\u001b[0m\u001b[39m \u001b[0m\u001b[33msize\u001b[0m\u001b[39m=\u001b[0m\u001b[35m1275x1651\u001b[0m\u001b[39m at \u001b[0m\u001b[94m0x35512CCD0\u001b[0m\u001b[1m>\u001b[0m\u001b[1m]\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
341
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m images = \u001b[1m[\u001b[0m\u001b[1m<\u001b[0m\u001b[1;95mPIL.Image.Image\u001b[0m\u001b[39m image \u001b[0m\u001b[33mmode\u001b[0m\u001b[39m=\u001b[0m\u001b[35mRGB\u001b[0m\u001b[39m \u001b[0m\u001b[33msize\u001b[0m\u001b[39m=\u001b[0m\u001b[35m1275x1651\u001b[0m\u001b[39m at \u001b[0m\u001b[94m0x35512CCD0\u001b[0m\u001b[1m>\u001b[0m\u001b[1m]\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
342
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m img = \u001b[1m<\u001b[0m\u001b[1;95mPIL.Image.Image\u001b[0m\u001b[39m image \u001b[0m\u001b[33mmode\u001b[0m\u001b[39m=\u001b[0m\u001b[35mRGB\u001b[0m\u001b[39m \u001b[0m\u001b[33msize\u001b[0m\u001b[39m=\u001b[0m\u001b[35m1275x1651\u001b[0m\u001b[39m at \u001b[0m\u001b[94m0x35512CCD0\u001b[0m\u001b[1m>\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
343
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m languages = \u001b[1m[\u001b[0m\u001b[33m'en'\u001b[0m, \u001b[33m'de'\u001b[0m\u001b[1m]\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
344
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m min_confidence = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
345
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m options = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
346
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m processed_img = \u001b[1m<\u001b[0m\u001b[1;95mPIL.Image.Image\u001b[0m\u001b[39m image \u001b[0m\u001b[33mmode\u001b[0m\u001b[39m=\u001b[0m\u001b[35mRGB\u001b[0m\u001b[39m \u001b[0m\u001b[33msize\u001b[0m\u001b[39m=\u001b[0m\u001b[35m1275x1651\u001b[0m\u001b[39m at \u001b[0m\u001b[94m0x35512CCD0\u001b[0m\u001b[1m>\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
347
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m results = \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
348
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m self = \u001b[1m<\u001b[0m\u001b[1;95mnatural_pdf.ocr.engine_surya.SuryaOCREngine\u001b[0m\u001b[39m object at \u001b[0m\u001b[94m0x35512cfd0\u001b[0m\u001b[1m>\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
349
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m single_image = \u001b[94mFalse\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
350
+ "\u001b[31m│\u001b[0m \u001b[33m╰────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[31m│\u001b[0m\n",
351
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
352
+ "\u001b[31m│\u001b[0m \u001b[2;33m/Users/soma/Development/natural-pdf/natural_pdf/ocr/\u001b[0m\u001b[1;33mengine_surya.py\u001b[0m:\u001b[94m71\u001b[0m in \u001b[92m_process_single_image\u001b[0m \u001b[31m│\u001b[0m\n",
353
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
354
+ "\u001b[31m│\u001b[0m \u001b[2m 68 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m detect_only: \u001b[31m│\u001b[0m\n",
355
+ "\u001b[31m│\u001b[0m \u001b[2m 69 \u001b[0m\u001b[2m│ │ │ \u001b[0mresults = \u001b[96mself\u001b[0m._detection_predictor(images=[image]) \u001b[31m│\u001b[0m\n",
356
+ "\u001b[31m│\u001b[0m \u001b[2m 70 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n",
357
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 71 \u001b[2m│ │ │ \u001b[0mresults = \u001b[96mself\u001b[0m._recognition_predictor( \u001b[31m│\u001b[0m\n",
358
+ "\u001b[31m│\u001b[0m \u001b[2m 72 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mimages=[image], \u001b[31m│\u001b[0m\n",
359
+ "\u001b[31m│\u001b[0m \u001b[2m 73 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mlangs=langs, \u001b[2m# Use the languages set during initialization\u001b[0m \u001b[31m│\u001b[0m\n",
360
+ "\u001b[31m│\u001b[0m \u001b[2m 74 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mdet_predictor=\u001b[96mself\u001b[0m._detection_predictor, \u001b[31m│\u001b[0m\n",
361
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
362
+ "\u001b[31m│\u001b[0m \u001b[33m╭─\u001b[0m\u001b[33m────────────────────────────────────\u001b[0m\u001b[33m locals \u001b[0m\u001b[33m─────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m \u001b[31m│\u001b[0m\n",
363
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m detect_only = \u001b[94mFalse\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
364
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m image = \u001b[1m<\u001b[0m\u001b[1;95mPIL.Image.Image\u001b[0m\u001b[39m image \u001b[0m\u001b[33mmode\u001b[0m\u001b[39m=\u001b[0m\u001b[35mRGB\u001b[0m\u001b[39m \u001b[0m\u001b[33msize\u001b[0m\u001b[39m=\u001b[0m\u001b[35m1275x1651\u001b[0m\u001b[39m at \u001b[0m\u001b[94m0x35512CCD0\u001b[0m\u001b[1m>\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
365
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m langs = \u001b[1m[\u001b[0m\u001b[1m[\u001b[0m\u001b[33m'en'\u001b[0m\u001b[1m]\u001b[0m, \u001b[1m[\u001b[0m\u001b[33m'de'\u001b[0m\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
366
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m options = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
367
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m self = \u001b[1m<\u001b[0m\u001b[1;95mnatural_pdf.ocr.engine_surya.SuryaOCREngine\u001b[0m\u001b[39m object at \u001b[0m\u001b[94m0x35512cfd0\u001b[0m\u001b[1m>\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
368
+ "\u001b[31m│\u001b[0m \u001b[33m╰───────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[31m│\u001b[0m\n",
369
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
370
+ "\u001b[31m│\u001b[0m \u001b[2;33m/Users/soma/.pyenv/versions/3.10.13/lib/python3.10/site-packages/surya/recognition/\u001b[0m\u001b[1;33m__init__.py\u001b[0m:\u001b[94m4\u001b[0m \u001b[31m│\u001b[0m\n",
371
+ "\u001b[31m│\u001b[0m \u001b[94m4\u001b[0m in \u001b[92m__call__\u001b[0m \u001b[31m│\u001b[0m\n",
372
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
373
+ "\u001b[31m│\u001b[0m \u001b[2m 41 \u001b[0m\u001b[2m│ │ │ \u001b[0mpolygons: List[List[List[List[\u001b[96mint\u001b[0m]]]] | \u001b[94mNone\u001b[0m = \u001b[94mNone\u001b[0m, \u001b[31m│\u001b[0m\n",
374
+ "\u001b[31m│\u001b[0m \u001b[2m 42 \u001b[0m\u001b[2m│ │ │ \u001b[0msort_lines: \u001b[96mbool\u001b[0m = \u001b[94mTrue\u001b[0m \u001b[31m│\u001b[0m\n",
375
+ "\u001b[31m│\u001b[0m \u001b[2m 43 \u001b[0m\u001b[2m│ \u001b[0m) -> List[OCRResult]: \u001b[31m│\u001b[0m\n",
376
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 44 \u001b[2m│ │ │ \u001b[0m\u001b[94massert\u001b[0m \u001b[96mlen\u001b[0m(images) == \u001b[96mlen\u001b[0m(langs), \u001b[33m\"\u001b[0m\u001b[33mYou need to pass in one list of languages\u001b[0m \u001b[31m│\u001b[0m\n",
377
+ "\u001b[31m│\u001b[0m \u001b[2m 45 \u001b[0m\u001b[2m│ │ │ \u001b[0mimages = convert_if_not_rgb(images) \u001b[31m│\u001b[0m\n",
378
+ "\u001b[31m│\u001b[0m \u001b[2m 46 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m highres_images \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31m│\u001b[0m\n",
379
+ "\u001b[31m│\u001b[0m \u001b[2m 47 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94massert\u001b[0m \u001b[96mlen\u001b[0m(images) == \u001b[96mlen\u001b[0m(highres_images), \u001b[33m\"\u001b[0m\u001b[33mYou need to pass in one high\u001b[0m \u001b[31m│\u001b[0m\n",
380
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
381
+ "\u001b[31m│\u001b[0m \u001b[33m╭─\u001b[0m\u001b[33m────────────────────────────────────────\u001b[0m\u001b[33m locals \u001b[0m\u001b[33m─────────────────────────────────────────\u001b[0m\u001b[33m─╮\u001b[0m \u001b[31m│\u001b[0m\n",
382
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m bboxes = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
383
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m det_predictor = \u001b[1m<\u001b[0m\u001b[1;95msurya.detection.DetectionPredictor\u001b[0m\u001b[39m object at \u001b[0m\u001b[94m0x3550b5390\u001b[0m\u001b[1m>\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
384
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m detection_batch_size = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
385
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m highres_images = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
386
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m images = \u001b[1m[\u001b[0m\u001b[1m<\u001b[0m\u001b[1;95mPIL.Image.Image\u001b[0m\u001b[39m image \u001b[0m\u001b[33mmode\u001b[0m\u001b[39m=\u001b[0m\u001b[35mRGB\u001b[0m\u001b[39m \u001b[0m\u001b[33msize\u001b[0m\u001b[39m=\u001b[0m\u001b[35m1275x1651\u001b[0m\u001b[39m at \u001b[0m\u001b[94m0x35512CCD0\u001b[0m\u001b[1m>\u001b[0m\u001b[1m]\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
387
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m langs = \u001b[1m[\u001b[0m\u001b[1m[\u001b[0m\u001b[33m'en'\u001b[0m\u001b[1m]\u001b[0m, \u001b[1m[\u001b[0m\u001b[33m'de'\u001b[0m\u001b[1m]\u001b[0m\u001b[1m]\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
388
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m polygons = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
389
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m recognition_batch_size = \u001b[94mNone\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
390
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m self = \u001b[1m<\u001b[0m\u001b[1;95msurya.recognition.RecognitionPredictor\u001b[0m\u001b[39m object at \u001b[0m\u001b[94m0x35512c5b0\u001b[0m\u001b[1m>\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
391
+ "\u001b[31m│\u001b[0m \u001b[33m│\u001b[0m sort_lines = \u001b[94mTrue\u001b[0m \u001b[33m│\u001b[0m \u001b[31m│\u001b[0m\n",
392
+ "\u001b[31m│\u001b[0m \u001b[33m╰───────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[31m│\u001b[0m\n",
393
+ "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
394
+ "\u001b[1;91mAssertionError: \u001b[0mYou need to pass in one list of languages for each image\n"
395
+ ]
396
+ },
397
+ "metadata": {},
398
+ "output_type": "display_data"
399
+ },
249
400
  {
250
401
  "name": "stderr",
251
402
  "output_type": "stream",
252
403
  "text": [
253
- "\u001b[2m2025-04-21T21:32:28.446834Z\u001b[0m [\u001b[33m\u001b[1mwarning \u001b[0m] \u001b[1mGOOGLE_API_KEY environment variable not set. Gemini detector (via OpenAI lib) will not be available.\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m72\u001b[0m \u001b[36mmodule\u001b[0m=\u001b[35mnatural_pdf.analyzers.layout.gemini\u001b[0m\n"
404
+ "\u001b[2m2025-04-27T16:34:08.337905Z\u001b[0m [\u001b[31m\u001b[1merror \u001b[0m] \u001b[1mAn unexpected error occurred during OCR processing: You need to pass in one list of languages for each image\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m236\u001b[0m \u001b[36mmodule\u001b[0m=\u001b[35mnatural_pdf.ocr.ocr_manager\u001b[0m\n",
405
+ "\n"
254
406
  ]
255
407
  },
256
408
  {
257
409
  "name": "stderr",
258
410
  "output_type": "stream",
259
411
  "text": [
260
- "[2025-04-21 17:32:28,446] [ WARNING] gemini.py:72 - GOOGLE_API_KEY environment variable not set. Gemini detector (via OpenAI lib) will not be available.\n"
412
+ "[2025-04-27 12:34:08,337] [ ERROR] ocr_manager.py:236 - An unexpected error occurred during OCR processing: You need to pass in one list of languages for each image\n",
413
+ "Traceback (most recent call last):\n",
414
+ " File \"/Users/soma/Development/natural-pdf/natural_pdf/ocr/ocr_manager.py\", line 195, in apply_ocr\n",
415
+ " results = engine_instance.process_image(\n",
416
+ " File \"/Users/soma/Development/natural-pdf/natural_pdf/ocr/engine.py\", line 117, in process_image\n",
417
+ " raw_results = self._process_single_image(processed_img, detect_only, options)\n",
418
+ " File \"/Users/soma/Development/natural-pdf/natural_pdf/ocr/engine_surya.py\", line 71, in _process_single_image\n",
419
+ " results = self._recognition_predictor(\n",
420
+ " File \"/Users/soma/.pyenv/versions/3.10.13/lib/python3.10/site-packages/surya/recognition/__init__.py\", line 44, in __call__\n",
421
+ " assert len(images) == len(langs), \"You need to pass in one list of languages for each image\"\n",
422
+ "AssertionError: You need to pass in one list of languages for each image\n"
261
423
  ]
262
424
  },
263
425
  {
264
- "name": "stdout",
426
+ "name": "stderr",
265
427
  "output_type": "stream",
266
428
  "text": [
267
- "\n"
429
+ "\u001b[2m2025-04-27T16:34:08.409507Z\u001b[0m [\u001b[31m\u001b[1merror \u001b[0m] \u001b[1mBatch OCR processing failed: You need to pass in one list of languages for each image\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m366\u001b[0m \u001b[36mmodule\u001b[0m=\u001b[35mnatural_pdf.core.pdf\u001b[0m\n"
268
430
  ]
269
431
  },
270
432
  {
271
- "name": "stdout",
433
+ "name": "stderr",
272
434
  "output_type": "stream",
273
435
  "text": [
274
- "image 1/1 /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmpjbbxsx1v/temp_layout_image.png: 1024x800 2 titles, 2 plain texts, 3 abandons, 1 table, 1940.4ms\n"
436
+ "[2025-04-27 12:34:08,409] [ ERROR] pdf.py:366 - Batch OCR processing failed: You need to pass in one list of languages for each image\n"
275
437
  ]
276
438
  },
277
439
  {
278
440
  "name": "stdout",
279
441
  "output_type": "stream",
280
442
  "text": [
281
- "Speed: 5.4ms preprocess, 1940.4ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 800)\n"
443
+ "\n",
444
+ "Extracted text after OCR:\n",
445
+ "...\n"
282
446
  ]
283
447
  }
284
448
  ],
285
449
  "source": [
286
- "# Enable OCR and analyze the document layout\n",
287
- "page.use_ocr = True\n",
288
- "page.analyze_layout()\n",
289
- "\n",
290
- "# Find table regions\n",
291
- "table_regions = page.find_all('region[type=table]')\n",
292
- "\n",
293
- "# Visualize any detected tables\n",
294
- "table_regions.highlight()\n",
295
- "\n",
296
- "# Extract the first table if found\n",
297
- "if table_regions:\n",
298
- " table_data = table_regions[0].extract_table()\n",
299
- " table_data\n",
300
- "else:\n",
301
- " \"No tables found in the document\""
450
+ "# Apply OCR using the default engine (EasyOCR) for English\n",
451
+ "page.apply_ocr(languages=['en'])\n",
452
+ "\n",
453
+ "# Select all text pieces found by OCR\n",
454
+ "text_elements = page.find_all('text[source=ocr]')\n",
455
+ "print(f\"Found {len(text_elements)} text elements using default OCR\")\n",
456
+ "\n",
457
+ "# Visualize the elements\n",
458
+ "text_elements.highlight()\n",
459
+ "\n",
460
+ "# Apply OCR using PaddleOCR for English and Chinese\n",
461
+ "page.apply_ocr(engine='paddle', languages=['en', 'ch_sim'])\n",
462
+ "\n",
463
+ "# Apply OCR using SuryaOCR for English and German\n",
464
+ "page.apply_ocr(engine='surya', languages=['en', 'de'])\n",
465
+ "\n",
466
+ "text_with_ocr = page.extract_text()\n",
467
+ "print(f\"\\nExtracted text after OCR:\\n{text_with_ocr[:150]}...\")"
302
468
  ]
303
469
  },
304
470
  {
305
471
  "cell_type": "markdown",
306
- "id": "6a3c701e",
472
+ "id": "95099bf7",
307
473
  "metadata": {},
308
474
  "source": [
309
- "## Finding Form Fields in Scanned Documents"
475
+ "## Advanced OCR Configuration\n",
476
+ "\n",
477
+ "For more control, import and use the specific `Options` class for your chosen engine within the `apply_ocr` call."
310
478
  ]
311
479
  },
312
480
  {
313
481
  "cell_type": "code",
314
- "execution_count": 7,
315
- "id": "7180badd",
482
+ "execution_count": 4,
483
+ "id": "d2808068",
316
484
  "metadata": {
317
485
  "execution": {
318
- "iopub.execute_input": "2025-04-21T21:32:30.755960Z",
319
- "iopub.status.busy": "2025-04-21T21:32:30.755766Z",
320
- "iopub.status.idle": "2025-04-21T21:32:30.762760Z",
321
- "shell.execute_reply": "2025-04-21T21:32:30.762434Z"
486
+ "iopub.execute_input": "2025-04-27T16:34:08.412785Z",
487
+ "iopub.status.busy": "2025-04-27T16:34:08.412688Z",
488
+ "iopub.status.idle": "2025-04-27T16:34:26.977343Z",
489
+ "shell.execute_reply": "2025-04-27T16:34:26.977013Z"
322
490
  }
323
491
  },
324
492
  "outputs": [
325
493
  {
326
494
  "data": {
495
+ "application/vnd.jupyter.widget-view+json": {
496
+ "model_id": "2604f98986f34efab6df0e7dbf9ed4f5",
497
+ "version_major": 2,
498
+ "version_minor": 0
499
+ },
327
500
  "text/plain": [
328
- "{\"Site: Durham's Meatpacking Chicago, IIl.\": 'Jungle Health and Satety Inspection Service\\nINS-UPONSINCLAIR \\n \\n \\n \\n \\n \\nSummary: Worst of any, however; were the fertilizer men, and those who served in the cooking rooms\\nThese people could not be shown to the visitor for the odor of a fertilizer man would scare any\\nvisitor at a hundred yards, and as for the other men, who worked in tank rooms full of steam, and in\\nsome of which there were open vats near the level of the floor; their peculiar trouble was that they fell\\ninlo the vats; and when they were fished out; there was never enough of them left to be worth\\nwould be overlooked for days, till all but the bones of them had gone out\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\nLevel \\nUnsanitary Working Conditions Critical\\nInadequate Protective Equipment: Serious\\n \\nSerious \\nFailure to Properly Storc Hazardous Materials_ Critical\\nSafety Measures_ Serious \\nInadequate Ventilation Systems Serious\\n \\nInsufficient Employee Training for Safe Work Practices Serious\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\nJungle Health and Salety Irspection Service',\n",
329
- " 'Date: February 3, 1905': \"Jungle Health and Satety Inspection Service\\n INS-UPONSINCLAIR \\n \\nSite: Durham's Meatpacking Chicago, IIl.\\n \\n \\n \\nSummary: Worst of any, however; were the fertilizer men, and those who served in the cooking rooms\\nThese people could not be shown to the visitor for the odor of a fertilizer man would scare any\\nvisitor at a hundred yards, and as for the other men, who worked in tank rooms full of steam, and in\\nsome of which there were open vats near the level of the floor; their peculiar trouble was that they fell\\ninlo the vats; and when they were fished out; there was never enough of them left to be worth\\ntheywould be overlooked for days, till all but the bones of them had gone out\\nto thc world as Durham's Purc Lcaf Lard!\\n \\n \\n \\n \\n \\n \\n \\n \\n \\nDescription \\n \\nUnsanitary Working Conditions\\nInadequate Protective Equipment:\\nIneffective Injury Prevention _\\n \\nFailure to Properly Storc Hazardous Materials_\\nLack of AdequateFireSafety Measures_\\nInadequate Ventilation Systems\\n \\nInsufficient Employee Training for Safe Work Practices\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\nJungle Health and Salety Irspection Service\",\n",
330
- " 'Violation Count': \"Site: Durham's Meatpacking Chicago, IIl.\\nDate: February 3, 1905 \\n \\n \\nSummary: Worst of any, however; were the fertilizer men, and those who served in the cooking rooms\\nThese people could not be shown to the visitor for the odor of a fertilizer man would scare any\\nvisitor at a hundred yards, and as for the other men, who worked in tank rooms full of steam, and in\\nsome of which there were open vats near the level of the floor; their peculiar trouble was that they fell\\ninlo the vats; and when they were fished out; there was never enough of them left to be worth\\nsometimestheywould be overlooked for days, till all but the bones of them had gone out\\nto thc world as Durham's Purc Lcaf Lard!\\n \\n \\n \\n \\n \\n \\n \\n \\n \\nDescription \\n \\nUnsanitary Working Conditions\\nInadequate Protective Equipment:\\nIneffective Injury Prevention _\\n \\nFailure to Properly Storc Hazardous Materials_\\nLack of AdequateFireSafety Measures_\\nInadequate Ventilation Systems\\n \\nInsufficient Employee Training for Safe Work Practices\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\nJungle Health and Salety Irspection Service\",\n",
331
- " 'Summary: Worst of any, however; were the fertilizer men, and those who served in the cooking rooms': 'Red (ZGB tuple] \\n \\nJungle Health and Satety Inspection Service\\n \\n \\n \\n \\n \\n \\n \\nordinary \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\nRepeat?',\n",
332
- " 'Inadequate Protective Equipment': 'Jungle Health and Satety Inspection Service\\nINS-UPONSINCLAIR \\n \\n \\n \\n \\n \\nSummary: Worst of any, however; were the fertilizer men, and those who served in the cooking rooms\\nThese people could not be shown to the visitor for the odor of a fertilizer man would scare anyordinary\\nvisitor at a hundred yards, and as for the other men, who worked in tank rooms full of steam, and in\\nsome of which there were open vats near the level of the floor; their peculiar trouble was that they fell\\ninlo the vats; and when they were fished out; there was never enough of them left to be worth\\nwould be overlooked for days, till all but the bones of them had gone out\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\nLevel \\nCritical \\nSerious \\n \\nSerious \\nFailure to Properly Storc Hazardous Materials_ Critical\\nSafety Measures_ Serious \\nSerious \\n \\nInsufficient Employee Training for Safe Work Practices Serious\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\nJungle Health and Salety Irspection Service'}"
501
+ "Rendering pages: 0%| | 0/1 [00:00<?, ?it/s]"
333
502
  ]
334
503
  },
335
- "execution_count": 7,
336
504
  "metadata": {},
337
- "output_type": "execute_result"
338
- }
339
- ],
340
- "source": [
341
- "# Look for potential form labels (containing a colon)\n",
342
- "labels = page.find_all('text:contains(\":\")') \n",
343
- "\n",
344
- "# Visualize the labels\n",
345
- "labels.highlight()\n",
346
- "\n",
347
- "# Extract form data by looking to the right of each label\n",
348
- "form_data = {}\n",
349
- "for label in labels:\n",
350
- " # Clean the label text\n",
351
- " field_name = label.text.strip().rstrip(':')\n",
352
- " \n",
353
- " # Find the value to the right\n",
354
- " value_element = label.right(width=200)\n",
355
- " value = value_element.extract_text().strip()\n",
356
- " \n",
357
- " # Add to our dictionary\n",
358
- " form_data[field_name] = value\n",
359
- "\n",
360
- "# Display the extracted data\n",
361
- "form_data"
362
- ]
363
- },
364
- {
365
- "cell_type": "markdown",
366
- "id": "5495e93c",
367
- "metadata": {},
368
- "source": [
369
- "## Combining OCR with Layout Analysis"
370
- ]
371
- },
372
- {
373
- "cell_type": "code",
374
- "execution_count": 8,
375
- "id": "20b489df",
376
- "metadata": {
377
- "execution": {
378
- "iopub.execute_input": "2025-04-21T21:32:30.764203Z",
379
- "iopub.status.busy": "2025-04-21T21:32:30.764045Z",
380
- "iopub.status.idle": "2025-04-21T21:32:32.790129Z",
381
- "shell.execute_reply": "2025-04-21T21:32:32.789771Z"
382
- }
383
- },
384
- "outputs": [
385
- {
386
- "name": "stderr",
387
- "output_type": "stream",
388
- "text": [
389
- "\u001b[2m2025-04-21T21:32:30.782293Z\u001b[0m [\u001b[33m\u001b[1mwarning \u001b[0m] \u001b[1mGOOGLE_API_KEY environment variable not set. Gemini detector (via OpenAI lib) will not be available.\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m72\u001b[0m \u001b[36mmodule\u001b[0m=\u001b[35mnatural_pdf.analyzers.layout.gemini\u001b[0m\n"
390
- ]
505
+ "output_type": "display_data"
391
506
  },
392
507
  {
393
- "name": "stderr",
394
- "output_type": "stream",
395
- "text": [
396
- "[2025-04-21 17:32:30,782] [ WARNING] gemini.py:72 - GOOGLE_API_KEY environment variable not set. Gemini detector (via OpenAI lib) will not be available.\n"
397
- ]
508
+ "data": {
509
+ "application/vnd.jupyter.widget-view+json": {
510
+ "model_id": "eec78714154b4bf6aca4b7ccb3c157d1",
511
+ "version_major": 2,
512
+ "version_minor": 0
513
+ },
514
+ "text/plain": [
515
+ "Rendering pages: 0%| | 0/1 [00:00<?, ?it/s]"
516
+ ]
517
+ },
518
+ "metadata": {},
519
+ "output_type": "display_data"
398
520
  },
399
521
  {
400
- "name": "stderr",
401
- "output_type": "stream",
402
- "text": [
403
- "\u001b[2m2025-04-21T21:32:30.783192Z\u001b[0m [\u001b[33m\u001b[1mwarning \u001b[0m] \u001b[1mGOOGLE_API_KEY environment variable not set. Gemini detector (via OpenAI lib) will not be available.\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m72\u001b[0m \u001b[36mmodule\u001b[0m=\u001b[35mnatural_pdf.analyzers.layout.gemini\u001b[0m\n"
404
- ]
522
+ "data": {
523
+ "application/vnd.jupyter.widget-view+json": {
524
+ "model_id": "ea2ea03faf7c431eaed13f8e9de9b088",
525
+ "version_major": 2,
526
+ "version_minor": 0
527
+ },
528
+ "text/plain": [
529
+ "Rendering pages: 0%| | 0/1 [00:00<?, ?it/s]"
530
+ ]
531
+ },
532
+ "metadata": {},
533
+ "output_type": "display_data"
405
534
  },
406
535
  {
407
536
  "name": "stderr",
408
537
  "output_type": "stream",
409
538
  "text": [
410
- "[2025-04-21 17:32:30,783] [ WARNING] gemini.py:72 - GOOGLE_API_KEY environment variable not set. Gemini detector (via OpenAI lib) will not be available.\n"
539
+ "\r",
540
+ "Detecting bboxes: 0%| | 0/1 [00:00<?, ?it/s]"
411
541
  ]
412
542
  },
413
543
  {
414
- "name": "stdout",
544
+ "name": "stderr",
415
545
  "output_type": "stream",
416
546
  "text": [
417
- "\n"
547
+ "\r",
548
+ "Detecting bboxes: 100%|███████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.74it/s]"
418
549
  ]
419
550
  },
420
551
  {
421
- "name": "stdout",
552
+ "name": "stderr",
422
553
  "output_type": "stream",
423
554
  "text": [
424
- "image 1/1 /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmprtsl29ey/temp_layout_image.png: 1024x800 2 titles, 2 plain texts, 3 abandons, 1 table, 1925.6ms\n"
555
+ "\r",
556
+ "Detecting bboxes: 100%|███████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.73it/s]"
425
557
  ]
426
558
  },
427
559
  {
428
- "name": "stdout",
560
+ "name": "stderr",
429
561
  "output_type": "stream",
430
562
  "text": [
431
- "Speed: 4.7ms preprocess, 1925.6ms inference, 1.2ms postprocess per image at shape (1, 3, 1024, 800)\n"
563
+ "\n"
432
564
  ]
433
565
  },
434
566
  {
435
567
  "data": {
436
568
  "text/plain": [
437
- "[]"
569
+ "<Page number=1 index=0>"
438
570
  ]
439
571
  },
440
- "execution_count": 8,
572
+ "execution_count": 4,
441
573
  "metadata": {},
442
574
  "output_type": "execute_result"
443
575
  }
444
576
  ],
445
577
  "source": [
446
- "# Apply OCR and analyze layout\n",
447
- "page.use_ocr = True\n",
448
- "page.analyze_layout()\n",
578
+ "from natural_pdf.ocr import PaddleOCROptions, EasyOCROptions, SuryaOCROptions\n",
579
+ "\n",
580
+ "# Re-apply OCR using EasyOCR with specific options\n",
581
+ "easy_opts = EasyOCROptions(\n",
582
+ " paragraph=False,\n",
583
+ ")\n",
584
+ "page.apply_ocr(engine='easyocr', languages=['en'], min_confidence=0.1, options=easy_opts)\n",
449
585
  "\n",
450
- "# Find document structure elements\n",
451
- "headings = page.find_all('region[type=heading]')\n",
452
- "paragraphs = page.find_all('region[type=paragraph]')\n",
586
+ "paddle_opts = PaddleOCROptions(\n",
587
+ " use_angle_cls=False,\n",
588
+ " det_db_thresh=0.3,\n",
589
+ ")\n",
590
+ "page.apply_ocr(engine='paddle', languages=['en'], options=paddle_opts)\n",
591
+ "\n",
592
+ "surya_opts = SuryaOCROptions()\n",
593
+ "page.apply_ocr(engine='surya', languages=['en'], min_confidence=0.5, detect_only=True, options=surya_opts)"
594
+ ]
595
+ },
596
+ {
597
+ "cell_type": "markdown",
598
+ "id": "18499b9e",
599
+ "metadata": {},
600
+ "source": [
601
+ "## Interactive OCR Correction / Debugging\n",
453
602
  "\n",
454
- "# Visualize the structure\n",
455
- "headings.highlight(color=\"red\", label=\"Headings\")\n",
456
- "paragraphs.highlight(color=\"blue\", label=\"Paragraphs\")\n",
603
+ "If OCR results aren't perfect, you can use the bundled interactive web application (SPA) to review and correct them.\n",
457
604
  "\n",
458
- "# Create a simple document outline\n",
459
- "document_outline = []\n",
460
- "for heading in headings:\n",
461
- " heading_text = heading.extract_text()\n",
462
- " document_outline.append(heading_text)\n",
605
+ "1. **Package the data:**\n",
606
+ " After running `apply_ocr` (or `apply_layout`), use `create_correction_task_package` to create a zip file containing the PDF images and detected elements.\n",
463
607
  "\n",
464
- "document_outline"
608
+ " ```python\n",
609
+ " from natural_pdf.utils.packaging import create_correction_task_package\n",
610
+ "\n",
611
+ " page.apply_ocr()\n",
612
+ "\n",
613
+ " create_correction_task_package(pdf, \"correction_package.zip\", overwrite=True)\n",
614
+ " ```\n",
615
+ "\n",
616
+ "2. **Run the SPA:**\n",
617
+ " Navigate to the SPA directory within the installed `natural_pdf` library in your terminal and start a simple web server.\n",
618
+ "\n",
619
+ "3. **Use the SPA:**\n",
620
+ " Open `http://localhost:8000` in your browser. Drag the `correction_package.zip` file onto the page to load the document. You can then click on text elements to correct the OCR results."
465
621
  ]
466
622
  },
467
623
  {
468
624
  "cell_type": "markdown",
469
- "id": "320bdfc4",
625
+ "id": "b2ec255f",
470
626
  "metadata": {},
471
627
  "source": [
472
- "## Working with Multiple Pages"
628
+ "## Working with Multiple Pages\n",
629
+ "\n",
630
+ "Apply OCR or layout analysis to all pages using the `PDF` object."
473
631
  ]
474
632
  },
475
633
  {
476
634
  "cell_type": "code",
477
- "execution_count": 9,
478
- "id": "9421a04d",
635
+ "execution_count": 5,
636
+ "id": "5d6b1ed1",
479
637
  "metadata": {
480
638
  "execution": {
481
- "iopub.execute_input": "2025-04-21T21:32:32.791525Z",
482
- "iopub.status.busy": "2025-04-21T21:32:32.791398Z",
483
- "iopub.status.idle": "2025-04-21T21:32:32.796295Z",
484
- "shell.execute_reply": "2025-04-21T21:32:32.795973Z"
639
+ "iopub.execute_input": "2025-04-27T16:34:26.978820Z",
640
+ "iopub.status.busy": "2025-04-27T16:34:26.978712Z",
641
+ "iopub.status.idle": "2025-04-27T16:34:36.843139Z",
642
+ "shell.execute_reply": "2025-04-27T16:34:36.842881Z"
485
643
  }
486
644
  },
487
645
  "outputs": [
488
646
  {
489
647
  "data": {
648
+ "application/vnd.jupyter.widget-view+json": {
649
+ "model_id": "eee280b518cb4bd7b9ee15666753bb55",
650
+ "version_major": 2,
651
+ "version_minor": 0
652
+ },
490
653
  "text/plain": [
491
- "['Page 1: \\n ...']"
654
+ "Rendering pages: 0%| | 0/1 [00:00<?, ?it/s]"
492
655
  ]
493
656
  },
494
- "execution_count": 9,
495
657
  "metadata": {},
496
- "output_type": "execute_result"
658
+ "output_type": "display_data"
659
+ },
660
+ {
661
+ "name": "stderr",
662
+ "output_type": "stream",
663
+ "text": [
664
+ "\u001b[2m2025-04-27T16:34:36.840807Z\u001b[0m [\u001b[33m\u001b[1mwarning \u001b[0m] \u001b[1mIgnoring unsupported layout keyword argument: 'page_separator'\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m57\u001b[0m \u001b[36mmodule\u001b[0m=\u001b[35mnatural_pdf.utils.text_extraction\u001b[0m\n"
665
+ ]
666
+ },
667
+ {
668
+ "name": "stderr",
669
+ "output_type": "stream",
670
+ "text": [
671
+ "[2025-04-27 12:34:36,840] [ WARNING] text_extraction.py:57 - Ignoring unsupported layout keyword argument: 'page_separator'\n"
672
+ ]
673
+ },
674
+ {
675
+ "name": "stdout",
676
+ "output_type": "stream",
677
+ "text": [
678
+ "Applied OCR to 1 pages.\n",
679
+ "\n",
680
+ "Combined text from all pages:\n",
681
+ "Red (ZGB tuple]\n",
682
+ "Jungle Health and Satety Inspection Service\n",
683
+ "INS-UPONSINCLAIR\n",
684
+ "Site: Durham's Meatpacking Chicago, IIl.\n",
685
+ "Date: February 3, 1905\n",
686
+ "Violation Count:\n",
687
+ "Summary: Worst of any, however; were the fertilizer men, and those who served in the cooking rooms\n",
688
+ "These people could not be shown to the visitorfor the odor of a fertilizer man would scare anyordinary\n",
689
+ "visitor at a hundred yards, and as for the other men, who worked in tank rooms full of steam, and in\n",
690
+ "some of which there were open vats near...\n"
691
+ ]
497
692
  }
498
693
  ],
499
694
  "source": [
500
695
  "# Process all pages in the document\n",
501
- "all_text = []\n",
502
- "\n",
503
- "for i, page in enumerate(pdf.pages):\n",
504
- " # Enable OCR for each page\n",
505
- " page.use_ocr = True\n",
506
- " \n",
507
- " # Extract text\n",
508
- " page_text = page.extract_text()\n",
509
- " \n",
510
- " # Add to our collection with page number\n",
511
- " all_text.append(f\"Page {i+1}: {page_text[:100]}...\")\n",
512
- "\n",
513
- "# Show the first few pages\n",
514
- "all_text"
696
+ "\n",
697
+ "# Apply OCR to all pages (example using EasyOCR)\n",
698
+ "pdf.apply_ocr(engine='easyocr', languages=['en'])\n",
699
+ "print(f\"Applied OCR to {len(pdf.pages)} pages.\")\n",
700
+ "\n",
701
+ "# Or apply layout analysis to all pages (example using Paddle)\n",
702
+ "# pdf.apply_layout(engine='paddle')\n",
703
+ "# print(f\"Applied Layout Analysis to {len(pdf.pages)} pages.\")\n",
704
+ "\n",
705
+ "# Extract text from all pages (uses OCR results if available)\n",
706
+ "all_text_content = pdf.extract_text(page_separator=\"\\\\n\\\\n---\\\\n\\\\n\")\n",
707
+ "\n",
708
+ "print(f\"\\nCombined text from all pages:\\n{all_text_content[:500]}...\")"
515
709
  ]
516
710
  },
517
711
  {
518
712
  "cell_type": "markdown",
519
- "id": "d69c14d1",
713
+ "id": "7d91a33b",
520
714
  "metadata": {},
521
715
  "source": [
522
716
  "## Saving PDFs with Searchable Text\n",
@@ -528,29 +722,50 @@
528
722
  },
529
723
  {
530
724
  "cell_type": "code",
531
- "execution_count": 10,
532
- "id": "e84f8946",
725
+ "execution_count": 6,
726
+ "id": "76ed7fd9",
533
727
  "metadata": {
534
728
  "execution": {
535
- "iopub.execute_input": "2025-04-21T21:32:32.797789Z",
536
- "iopub.status.busy": "2025-04-21T21:32:32.797610Z",
537
- "iopub.status.idle": "2025-04-21T21:32:49.165749Z",
538
- "shell.execute_reply": "2025-04-21T21:32:49.165293Z"
729
+ "iopub.execute_input": "2025-04-27T16:34:36.844528Z",
730
+ "iopub.status.busy": "2025-04-27T16:34:36.844403Z",
731
+ "iopub.status.idle": "2025-04-27T16:34:50.582612Z",
732
+ "shell.execute_reply": "2025-04-27T16:34:50.582308Z"
539
733
  }
540
734
  },
541
735
  "outputs": [
736
+ {
737
+ "data": {
738
+ "application/vnd.jupyter.widget-view+json": {
739
+ "model_id": "b32b0f4b5d9147bda684486bdba0dc0d",
740
+ "version_major": 2,
741
+ "version_minor": 0
742
+ },
743
+ "text/plain": [
744
+ "Rendering pages: 0%| | 0/1 [00:00<?, ?it/s]"
745
+ ]
746
+ },
747
+ "metadata": {},
748
+ "output_type": "display_data"
749
+ },
542
750
  {
543
751
  "name": "stderr",
544
752
  "output_type": "stream",
545
753
  "text": [
546
- "\u001b[2m2025-04-21T21:32:32.910436Z\u001b[0m [\u001b[33m\u001b[1mwarning \u001b[0m] \u001b[1mUsing CPU. Note: This module is much faster with a GPU.\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m71\u001b[0m \u001b[36mmodule\u001b[0m=\u001b[35measyocr.easyocr\u001b[0m\n"
754
+ "\u001b[2m2025-04-27T16:34:37.509530Z\u001b[0m [\u001b[33m\u001b[1mwarning \u001b[0m] \u001b[1mUsing CPU. Note: This module is much faster with a GPU.\u001b[0m \u001b[36mlineno\u001b[0m=\u001b[35m71\u001b[0m \u001b[36mmodule\u001b[0m=\u001b[35measyocr.easyocr\u001b[0m\n"
547
755
  ]
548
756
  },
549
757
  {
550
758
  "name": "stderr",
551
759
  "output_type": "stream",
552
760
  "text": [
553
- "[2025-04-21 17:32:32,910] [ WARNING] easyocr.py:71 - Using CPU. Note: This module is much faster with a GPU.\n"
761
+ "[2025-04-27 12:34:37,509] [ WARNING] easyocr.py:71 - Using CPU. Note: This module is much faster with a GPU.\n"
762
+ ]
763
+ },
764
+ {
765
+ "name": "stdout",
766
+ "output_type": "stream",
767
+ "text": [
768
+ "Saved searchable PDF to needs-ocr-searchable.pdf\n"
554
769
  ]
555
770
  }
556
771
  ],
@@ -560,14 +775,18 @@
560
775
  "input_pdf_path = \"https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/needs-ocr.pdf\"\n",
561
776
  "\n",
562
777
  "pdf = PDF(input_pdf_path)\n",
563
- "pdf.apply_ocr() \n",
778
+ "# Apply OCR to all pages before saving\n",
779
+ "# Use desired engine and options\n",
780
+ "pdf.apply_ocr(engine='easyocr', languages=['en'])\n",
781
+ "\n",
782
+ "pdf.save_searchable(\"needs-ocr-searchable.pdf\")\n",
564
783
  "\n",
565
- "pdf.save_searchable(\"needs-ocr-searchable.pdf\")"
784
+ "print(\"Saved searchable PDF to needs-ocr-searchable.pdf\")"
566
785
  ]
567
786
  },
568
787
  {
569
788
  "cell_type": "markdown",
570
- "id": "cd0b43ed",
789
+ "id": "db718c49",
571
790
  "metadata": {},
572
791
  "source": [
573
792
  "This creates `needs-ocr-searchable.pdf`, which looks identical to the original but now has a text layer corresponding to the OCR results. You can adjust the rendering resolution used during saving with the `dpi` parameter (default is 300).\n",
@@ -580,11 +799,12 @@
580
799
  "jupytext": {
581
800
  "cell_metadata_filter": "-all",
582
801
  "main_language": "python",
583
- "notebook_metadata_filter": "-all",
584
- "text_representation": {
585
- "extension": ".md",
586
- "format_name": "markdown"
587
- }
802
+ "notebook_metadata_filter": "-all"
803
+ },
804
+ "kernelspec": {
805
+ "display_name": "Python (natural-pdf)",
806
+ "language": "python",
807
+ "name": "natural-pdf"
588
808
  },
589
809
  "language_info": {
590
810
  "codemirror_mode": {
@@ -597,6 +817,2894 @@
597
817
  "nbconvert_exporter": "python",
598
818
  "pygments_lexer": "ipython3",
599
819
  "version": "3.10.13"
820
+ },
821
+ "widgets": {
822
+ "application/vnd.jupyter.widget-state+json": {
823
+ "state": {
824
+ "022a289a001e4e499b5d629364ec3cf2": {
825
+ "model_module": "@jupyter-widgets/base",
826
+ "model_module_version": "2.0.0",
827
+ "model_name": "LayoutModel",
828
+ "state": {
829
+ "_model_module": "@jupyter-widgets/base",
830
+ "_model_module_version": "2.0.0",
831
+ "_model_name": "LayoutModel",
832
+ "_view_count": null,
833
+ "_view_module": "@jupyter-widgets/base",
834
+ "_view_module_version": "2.0.0",
835
+ "_view_name": "LayoutView",
836
+ "align_content": null,
837
+ "align_items": null,
838
+ "align_self": null,
839
+ "border_bottom": null,
840
+ "border_left": null,
841
+ "border_right": null,
842
+ "border_top": null,
843
+ "bottom": null,
844
+ "display": null,
845
+ "flex": null,
846
+ "flex_flow": null,
847
+ "grid_area": null,
848
+ "grid_auto_columns": null,
849
+ "grid_auto_flow": null,
850
+ "grid_auto_rows": null,
851
+ "grid_column": null,
852
+ "grid_gap": null,
853
+ "grid_row": null,
854
+ "grid_template_areas": null,
855
+ "grid_template_columns": null,
856
+ "grid_template_rows": null,
857
+ "height": null,
858
+ "justify_content": null,
859
+ "justify_items": null,
860
+ "left": null,
861
+ "margin": null,
862
+ "max_height": null,
863
+ "max_width": null,
864
+ "min_height": null,
865
+ "min_width": null,
866
+ "object_fit": null,
867
+ "object_position": null,
868
+ "order": null,
869
+ "overflow": null,
870
+ "padding": null,
871
+ "right": null,
872
+ "top": null,
873
+ "visibility": null,
874
+ "width": null
875
+ }
876
+ },
877
+ "0447a8e29e7a42548b340e4ce078e56d": {
878
+ "model_module": "@jupyter-widgets/base",
879
+ "model_module_version": "2.0.0",
880
+ "model_name": "LayoutModel",
881
+ "state": {
882
+ "_model_module": "@jupyter-widgets/base",
883
+ "_model_module_version": "2.0.0",
884
+ "_model_name": "LayoutModel",
885
+ "_view_count": null,
886
+ "_view_module": "@jupyter-widgets/base",
887
+ "_view_module_version": "2.0.0",
888
+ "_view_name": "LayoutView",
889
+ "align_content": null,
890
+ "align_items": null,
891
+ "align_self": null,
892
+ "border_bottom": null,
893
+ "border_left": null,
894
+ "border_right": null,
895
+ "border_top": null,
896
+ "bottom": null,
897
+ "display": null,
898
+ "flex": null,
899
+ "flex_flow": null,
900
+ "grid_area": null,
901
+ "grid_auto_columns": null,
902
+ "grid_auto_flow": null,
903
+ "grid_auto_rows": null,
904
+ "grid_column": null,
905
+ "grid_gap": null,
906
+ "grid_row": null,
907
+ "grid_template_areas": null,
908
+ "grid_template_columns": null,
909
+ "grid_template_rows": null,
910
+ "height": null,
911
+ "justify_content": null,
912
+ "justify_items": null,
913
+ "left": null,
914
+ "margin": null,
915
+ "max_height": null,
916
+ "max_width": null,
917
+ "min_height": null,
918
+ "min_width": null,
919
+ "object_fit": null,
920
+ "object_position": null,
921
+ "order": null,
922
+ "overflow": null,
923
+ "padding": null,
924
+ "right": null,
925
+ "top": null,
926
+ "visibility": "hidden",
927
+ "width": null
928
+ }
929
+ },
930
+ "0a9ffb62a42d49ccbb896041f568e18c": {
931
+ "model_module": "@jupyter-widgets/controls",
932
+ "model_module_version": "2.0.0",
933
+ "model_name": "HTMLStyleModel",
934
+ "state": {
935
+ "_model_module": "@jupyter-widgets/controls",
936
+ "_model_module_version": "2.0.0",
937
+ "_model_name": "HTMLStyleModel",
938
+ "_view_count": null,
939
+ "_view_module": "@jupyter-widgets/base",
940
+ "_view_module_version": "2.0.0",
941
+ "_view_name": "StyleView",
942
+ "background": null,
943
+ "description_width": "",
944
+ "font_size": null,
945
+ "text_color": null
946
+ }
947
+ },
948
+ "0e449b943f2d48eb9c9ed2d6bdfdb557": {
949
+ "model_module": "@jupyter-widgets/base",
950
+ "model_module_version": "2.0.0",
951
+ "model_name": "LayoutModel",
952
+ "state": {
953
+ "_model_module": "@jupyter-widgets/base",
954
+ "_model_module_version": "2.0.0",
955
+ "_model_name": "LayoutModel",
956
+ "_view_count": null,
957
+ "_view_module": "@jupyter-widgets/base",
958
+ "_view_module_version": "2.0.0",
959
+ "_view_name": "LayoutView",
960
+ "align_content": null,
961
+ "align_items": null,
962
+ "align_self": null,
963
+ "border_bottom": null,
964
+ "border_left": null,
965
+ "border_right": null,
966
+ "border_top": null,
967
+ "bottom": null,
968
+ "display": null,
969
+ "flex": null,
970
+ "flex_flow": null,
971
+ "grid_area": null,
972
+ "grid_auto_columns": null,
973
+ "grid_auto_flow": null,
974
+ "grid_auto_rows": null,
975
+ "grid_column": null,
976
+ "grid_gap": null,
977
+ "grid_row": null,
978
+ "grid_template_areas": null,
979
+ "grid_template_columns": null,
980
+ "grid_template_rows": null,
981
+ "height": null,
982
+ "justify_content": null,
983
+ "justify_items": null,
984
+ "left": null,
985
+ "margin": null,
986
+ "max_height": null,
987
+ "max_width": null,
988
+ "min_height": null,
989
+ "min_width": null,
990
+ "object_fit": null,
991
+ "object_position": null,
992
+ "order": null,
993
+ "overflow": null,
994
+ "padding": null,
995
+ "right": null,
996
+ "top": null,
997
+ "visibility": null,
998
+ "width": null
999
+ }
1000
+ },
1001
+ "1c18d689ebb846d4908b0ad1e05838e5": {
1002
+ "model_module": "@jupyter-widgets/controls",
1003
+ "model_module_version": "2.0.0",
1004
+ "model_name": "FloatProgressModel",
1005
+ "state": {
1006
+ "_dom_classes": [],
1007
+ "_model_module": "@jupyter-widgets/controls",
1008
+ "_model_module_version": "2.0.0",
1009
+ "_model_name": "FloatProgressModel",
1010
+ "_view_count": null,
1011
+ "_view_module": "@jupyter-widgets/controls",
1012
+ "_view_module_version": "2.0.0",
1013
+ "_view_name": "ProgressView",
1014
+ "bar_style": "",
1015
+ "description": "",
1016
+ "description_allow_html": false,
1017
+ "layout": "IPY_MODEL_e637d6c0df174f33917c5a207172fa2b",
1018
+ "max": 1.0,
1019
+ "min": 0.0,
1020
+ "orientation": "horizontal",
1021
+ "style": "IPY_MODEL_9dabb33d83a94026b0643537203a019b",
1022
+ "tabbable": null,
1023
+ "tooltip": null,
1024
+ "value": 1.0
1025
+ }
1026
+ },
1027
+ "2604f98986f34efab6df0e7dbf9ed4f5": {
1028
+ "model_module": "@jupyter-widgets/controls",
1029
+ "model_module_version": "2.0.0",
1030
+ "model_name": "HBoxModel",
1031
+ "state": {
1032
+ "_dom_classes": [],
1033
+ "_model_module": "@jupyter-widgets/controls",
1034
+ "_model_module_version": "2.0.0",
1035
+ "_model_name": "HBoxModel",
1036
+ "_view_count": null,
1037
+ "_view_module": "@jupyter-widgets/controls",
1038
+ "_view_module_version": "2.0.0",
1039
+ "_view_name": "HBoxView",
1040
+ "box_style": "",
1041
+ "children": [
1042
+ "IPY_MODEL_99d50723f6b9434f8f6eed613f243556",
1043
+ "IPY_MODEL_38fb7515d640425e8fc8562d780237f5",
1044
+ "IPY_MODEL_d8adc464cd1c4e29b0b5c729c9c4b2d8"
1045
+ ],
1046
+ "layout": "IPY_MODEL_0447a8e29e7a42548b340e4ce078e56d",
1047
+ "tabbable": null,
1048
+ "tooltip": null
1049
+ }
1050
+ },
1051
+ "2610164fb3f7466985a46215b5b2bfa8": {
1052
+ "model_module": "@jupyter-widgets/controls",
1053
+ "model_module_version": "2.0.0",
1054
+ "model_name": "HBoxModel",
1055
+ "state": {
1056
+ "_dom_classes": [],
1057
+ "_model_module": "@jupyter-widgets/controls",
1058
+ "_model_module_version": "2.0.0",
1059
+ "_model_name": "HBoxModel",
1060
+ "_view_count": null,
1061
+ "_view_module": "@jupyter-widgets/controls",
1062
+ "_view_module_version": "2.0.0",
1063
+ "_view_name": "HBoxView",
1064
+ "box_style": "",
1065
+ "children": [
1066
+ "IPY_MODEL_c084e0a81dc0489e907da2ee73c66d1d",
1067
+ "IPY_MODEL_44e0e5f920824c0f889a80271bebc85a",
1068
+ "IPY_MODEL_7ebc9803f2534971995c7ed7eaa4e0b9"
1069
+ ],
1070
+ "layout": "IPY_MODEL_fc795c53429c43328aceb24e9478dbf6",
1071
+ "tabbable": null,
1072
+ "tooltip": null
1073
+ }
1074
+ },
1075
+ "273e9468cce1450ca0d9a42967a799f8": {
1076
+ "model_module": "@jupyter-widgets/base",
1077
+ "model_module_version": "2.0.0",
1078
+ "model_name": "LayoutModel",
1079
+ "state": {
1080
+ "_model_module": "@jupyter-widgets/base",
1081
+ "_model_module_version": "2.0.0",
1082
+ "_model_name": "LayoutModel",
1083
+ "_view_count": null,
1084
+ "_view_module": "@jupyter-widgets/base",
1085
+ "_view_module_version": "2.0.0",
1086
+ "_view_name": "LayoutView",
1087
+ "align_content": null,
1088
+ "align_items": null,
1089
+ "align_self": null,
1090
+ "border_bottom": null,
1091
+ "border_left": null,
1092
+ "border_right": null,
1093
+ "border_top": null,
1094
+ "bottom": null,
1095
+ "display": null,
1096
+ "flex": null,
1097
+ "flex_flow": null,
1098
+ "grid_area": null,
1099
+ "grid_auto_columns": null,
1100
+ "grid_auto_flow": null,
1101
+ "grid_auto_rows": null,
1102
+ "grid_column": null,
1103
+ "grid_gap": null,
1104
+ "grid_row": null,
1105
+ "grid_template_areas": null,
1106
+ "grid_template_columns": null,
1107
+ "grid_template_rows": null,
1108
+ "height": null,
1109
+ "justify_content": null,
1110
+ "justify_items": null,
1111
+ "left": null,
1112
+ "margin": null,
1113
+ "max_height": null,
1114
+ "max_width": null,
1115
+ "min_height": null,
1116
+ "min_width": null,
1117
+ "object_fit": null,
1118
+ "object_position": null,
1119
+ "order": null,
1120
+ "overflow": null,
1121
+ "padding": null,
1122
+ "right": null,
1123
+ "top": null,
1124
+ "visibility": null,
1125
+ "width": null
1126
+ }
1127
+ },
1128
+ "2d77ac4edf924b9d88b97929227b2497": {
1129
+ "model_module": "@jupyter-widgets/controls",
1130
+ "model_module_version": "2.0.0",
1131
+ "model_name": "ProgressStyleModel",
1132
+ "state": {
1133
+ "_model_module": "@jupyter-widgets/controls",
1134
+ "_model_module_version": "2.0.0",
1135
+ "_model_name": "ProgressStyleModel",
1136
+ "_view_count": null,
1137
+ "_view_module": "@jupyter-widgets/base",
1138
+ "_view_module_version": "2.0.0",
1139
+ "_view_name": "StyleView",
1140
+ "bar_color": null,
1141
+ "description_width": ""
1142
+ }
1143
+ },
1144
+ "2e8dbacbfbbc4381b0bbd0c0427ee370": {
1145
+ "model_module": "@jupyter-widgets/base",
1146
+ "model_module_version": "2.0.0",
1147
+ "model_name": "LayoutModel",
1148
+ "state": {
1149
+ "_model_module": "@jupyter-widgets/base",
1150
+ "_model_module_version": "2.0.0",
1151
+ "_model_name": "LayoutModel",
1152
+ "_view_count": null,
1153
+ "_view_module": "@jupyter-widgets/base",
1154
+ "_view_module_version": "2.0.0",
1155
+ "_view_name": "LayoutView",
1156
+ "align_content": null,
1157
+ "align_items": null,
1158
+ "align_self": null,
1159
+ "border_bottom": null,
1160
+ "border_left": null,
1161
+ "border_right": null,
1162
+ "border_top": null,
1163
+ "bottom": null,
1164
+ "display": null,
1165
+ "flex": null,
1166
+ "flex_flow": null,
1167
+ "grid_area": null,
1168
+ "grid_auto_columns": null,
1169
+ "grid_auto_flow": null,
1170
+ "grid_auto_rows": null,
1171
+ "grid_column": null,
1172
+ "grid_gap": null,
1173
+ "grid_row": null,
1174
+ "grid_template_areas": null,
1175
+ "grid_template_columns": null,
1176
+ "grid_template_rows": null,
1177
+ "height": null,
1178
+ "justify_content": null,
1179
+ "justify_items": null,
1180
+ "left": null,
1181
+ "margin": null,
1182
+ "max_height": null,
1183
+ "max_width": null,
1184
+ "min_height": null,
1185
+ "min_width": null,
1186
+ "object_fit": null,
1187
+ "object_position": null,
1188
+ "order": null,
1189
+ "overflow": null,
1190
+ "padding": null,
1191
+ "right": null,
1192
+ "top": null,
1193
+ "visibility": null,
1194
+ "width": null
1195
+ }
1196
+ },
1197
+ "3571a316fa3d401a9abd519a9bef510c": {
1198
+ "model_module": "@jupyter-widgets/controls",
1199
+ "model_module_version": "2.0.0",
1200
+ "model_name": "HTMLModel",
1201
+ "state": {
1202
+ "_dom_classes": [],
1203
+ "_model_module": "@jupyter-widgets/controls",
1204
+ "_model_module_version": "2.0.0",
1205
+ "_model_name": "HTMLModel",
1206
+ "_view_count": null,
1207
+ "_view_module": "@jupyter-widgets/controls",
1208
+ "_view_module_version": "2.0.0",
1209
+ "_view_name": "HTMLView",
1210
+ "description": "",
1211
+ "description_allow_html": false,
1212
+ "layout": "IPY_MODEL_273e9468cce1450ca0d9a42967a799f8",
1213
+ "placeholder": "​",
1214
+ "style": "IPY_MODEL_e092a03a328f4899b2b43c5b4e3695af",
1215
+ "tabbable": null,
1216
+ "tooltip": null,
1217
+ "value": " 0/1 [00:00&lt;?, ?it/s]"
1218
+ }
1219
+ },
1220
+ "381d045b3faa4d34b4395a11a55cc20d": {
1221
+ "model_module": "@jupyter-widgets/base",
1222
+ "model_module_version": "2.0.0",
1223
+ "model_name": "LayoutModel",
1224
+ "state": {
1225
+ "_model_module": "@jupyter-widgets/base",
1226
+ "_model_module_version": "2.0.0",
1227
+ "_model_name": "LayoutModel",
1228
+ "_view_count": null,
1229
+ "_view_module": "@jupyter-widgets/base",
1230
+ "_view_module_version": "2.0.0",
1231
+ "_view_name": "LayoutView",
1232
+ "align_content": null,
1233
+ "align_items": null,
1234
+ "align_self": null,
1235
+ "border_bottom": null,
1236
+ "border_left": null,
1237
+ "border_right": null,
1238
+ "border_top": null,
1239
+ "bottom": null,
1240
+ "display": null,
1241
+ "flex": null,
1242
+ "flex_flow": null,
1243
+ "grid_area": null,
1244
+ "grid_auto_columns": null,
1245
+ "grid_auto_flow": null,
1246
+ "grid_auto_rows": null,
1247
+ "grid_column": null,
1248
+ "grid_gap": null,
1249
+ "grid_row": null,
1250
+ "grid_template_areas": null,
1251
+ "grid_template_columns": null,
1252
+ "grid_template_rows": null,
1253
+ "height": null,
1254
+ "justify_content": null,
1255
+ "justify_items": null,
1256
+ "left": null,
1257
+ "margin": null,
1258
+ "max_height": null,
1259
+ "max_width": null,
1260
+ "min_height": null,
1261
+ "min_width": null,
1262
+ "object_fit": null,
1263
+ "object_position": null,
1264
+ "order": null,
1265
+ "overflow": null,
1266
+ "padding": null,
1267
+ "right": null,
1268
+ "top": null,
1269
+ "visibility": null,
1270
+ "width": null
1271
+ }
1272
+ },
1273
+ "38c1448b20af44e29a486c39171a00e0": {
1274
+ "model_module": "@jupyter-widgets/controls",
1275
+ "model_module_version": "2.0.0",
1276
+ "model_name": "ProgressStyleModel",
1277
+ "state": {
1278
+ "_model_module": "@jupyter-widgets/controls",
1279
+ "_model_module_version": "2.0.0",
1280
+ "_model_name": "ProgressStyleModel",
1281
+ "_view_count": null,
1282
+ "_view_module": "@jupyter-widgets/base",
1283
+ "_view_module_version": "2.0.0",
1284
+ "_view_name": "StyleView",
1285
+ "bar_color": null,
1286
+ "description_width": ""
1287
+ }
1288
+ },
1289
+ "38fb7515d640425e8fc8562d780237f5": {
1290
+ "model_module": "@jupyter-widgets/controls",
1291
+ "model_module_version": "2.0.0",
1292
+ "model_name": "FloatProgressModel",
1293
+ "state": {
1294
+ "_dom_classes": [],
1295
+ "_model_module": "@jupyter-widgets/controls",
1296
+ "_model_module_version": "2.0.0",
1297
+ "_model_name": "FloatProgressModel",
1298
+ "_view_count": null,
1299
+ "_view_module": "@jupyter-widgets/controls",
1300
+ "_view_module_version": "2.0.0",
1301
+ "_view_name": "ProgressView",
1302
+ "bar_style": "",
1303
+ "description": "",
1304
+ "description_allow_html": false,
1305
+ "layout": "IPY_MODEL_6c32dab276b147dc9ed4eeaf49f8637f",
1306
+ "max": 1.0,
1307
+ "min": 0.0,
1308
+ "orientation": "horizontal",
1309
+ "style": "IPY_MODEL_b35dccbf46ae4fd8a7e0cadfbfbfd626",
1310
+ "tabbable": null,
1311
+ "tooltip": null,
1312
+ "value": 1.0
1313
+ }
1314
+ },
1315
+ "44dbb66e4ff14e97bd956c995d2d8e16": {
1316
+ "model_module": "@jupyter-widgets/controls",
1317
+ "model_module_version": "2.0.0",
1318
+ "model_name": "HTMLStyleModel",
1319
+ "state": {
1320
+ "_model_module": "@jupyter-widgets/controls",
1321
+ "_model_module_version": "2.0.0",
1322
+ "_model_name": "HTMLStyleModel",
1323
+ "_view_count": null,
1324
+ "_view_module": "@jupyter-widgets/base",
1325
+ "_view_module_version": "2.0.0",
1326
+ "_view_name": "StyleView",
1327
+ "background": null,
1328
+ "description_width": "",
1329
+ "font_size": null,
1330
+ "text_color": null
1331
+ }
1332
+ },
1333
+ "44e0e5f920824c0f889a80271bebc85a": {
1334
+ "model_module": "@jupyter-widgets/controls",
1335
+ "model_module_version": "2.0.0",
1336
+ "model_name": "FloatProgressModel",
1337
+ "state": {
1338
+ "_dom_classes": [],
1339
+ "_model_module": "@jupyter-widgets/controls",
1340
+ "_model_module_version": "2.0.0",
1341
+ "_model_name": "FloatProgressModel",
1342
+ "_view_count": null,
1343
+ "_view_module": "@jupyter-widgets/controls",
1344
+ "_view_module_version": "2.0.0",
1345
+ "_view_name": "ProgressView",
1346
+ "bar_style": "",
1347
+ "description": "",
1348
+ "description_allow_html": false,
1349
+ "layout": "IPY_MODEL_e985937b7e3b4e6196ebb516634b6d3f",
1350
+ "max": 1.0,
1351
+ "min": 0.0,
1352
+ "orientation": "horizontal",
1353
+ "style": "IPY_MODEL_38c1448b20af44e29a486c39171a00e0",
1354
+ "tabbable": null,
1355
+ "tooltip": null,
1356
+ "value": 1.0
1357
+ }
1358
+ },
1359
+ "4b113c9d48ef40338ee8c3154d85ad6b": {
1360
+ "model_module": "@jupyter-widgets/controls",
1361
+ "model_module_version": "2.0.0",
1362
+ "model_name": "ProgressStyleModel",
1363
+ "state": {
1364
+ "_model_module": "@jupyter-widgets/controls",
1365
+ "_model_module_version": "2.0.0",
1366
+ "_model_name": "ProgressStyleModel",
1367
+ "_view_count": null,
1368
+ "_view_module": "@jupyter-widgets/base",
1369
+ "_view_module_version": "2.0.0",
1370
+ "_view_name": "StyleView",
1371
+ "bar_color": null,
1372
+ "description_width": ""
1373
+ }
1374
+ },
1375
+ "4b46ab41557547e8b1afacd9d4a4b370": {
1376
+ "model_module": "@jupyter-widgets/controls",
1377
+ "model_module_version": "2.0.0",
1378
+ "model_name": "FloatProgressModel",
1379
+ "state": {
1380
+ "_dom_classes": [],
1381
+ "_model_module": "@jupyter-widgets/controls",
1382
+ "_model_module_version": "2.0.0",
1383
+ "_model_name": "FloatProgressModel",
1384
+ "_view_count": null,
1385
+ "_view_module": "@jupyter-widgets/controls",
1386
+ "_view_module_version": "2.0.0",
1387
+ "_view_name": "ProgressView",
1388
+ "bar_style": "",
1389
+ "description": "",
1390
+ "description_allow_html": false,
1391
+ "layout": "IPY_MODEL_55a0608b33ed41e59e3769c5ca572640",
1392
+ "max": 1.0,
1393
+ "min": 0.0,
1394
+ "orientation": "horizontal",
1395
+ "style": "IPY_MODEL_8dadb7c8de2c4d098037f3a10cb18482",
1396
+ "tabbable": null,
1397
+ "tooltip": null,
1398
+ "value": 1.0
1399
+ }
1400
+ },
1401
+ "4c923325c4174be587eb87fa69fb9d34": {
1402
+ "model_module": "@jupyter-widgets/controls",
1403
+ "model_module_version": "2.0.0",
1404
+ "model_name": "HTMLStyleModel",
1405
+ "state": {
1406
+ "_model_module": "@jupyter-widgets/controls",
1407
+ "_model_module_version": "2.0.0",
1408
+ "_model_name": "HTMLStyleModel",
1409
+ "_view_count": null,
1410
+ "_view_module": "@jupyter-widgets/base",
1411
+ "_view_module_version": "2.0.0",
1412
+ "_view_name": "StyleView",
1413
+ "background": null,
1414
+ "description_width": "",
1415
+ "font_size": null,
1416
+ "text_color": null
1417
+ }
1418
+ },
1419
+ "4e25f956fccc4b1b969fbaa7f290492a": {
1420
+ "model_module": "@jupyter-widgets/controls",
1421
+ "model_module_version": "2.0.0",
1422
+ "model_name": "HTMLModel",
1423
+ "state": {
1424
+ "_dom_classes": [],
1425
+ "_model_module": "@jupyter-widgets/controls",
1426
+ "_model_module_version": "2.0.0",
1427
+ "_model_name": "HTMLModel",
1428
+ "_view_count": null,
1429
+ "_view_module": "@jupyter-widgets/controls",
1430
+ "_view_module_version": "2.0.0",
1431
+ "_view_name": "HTMLView",
1432
+ "description": "",
1433
+ "description_allow_html": false,
1434
+ "layout": "IPY_MODEL_52ce23907cd741a3a637e58e9649be7e",
1435
+ "placeholder": "​",
1436
+ "style": "IPY_MODEL_4c923325c4174be587eb87fa69fb9d34",
1437
+ "tabbable": null,
1438
+ "tooltip": null,
1439
+ "value": " 0/1 [00:00&lt;?, ?it/s]"
1440
+ }
1441
+ },
1442
+ "51463ffe39094a4c86e60da0f11cdbdd": {
1443
+ "model_module": "@jupyter-widgets/controls",
1444
+ "model_module_version": "2.0.0",
1445
+ "model_name": "HTMLStyleModel",
1446
+ "state": {
1447
+ "_model_module": "@jupyter-widgets/controls",
1448
+ "_model_module_version": "2.0.0",
1449
+ "_model_name": "HTMLStyleModel",
1450
+ "_view_count": null,
1451
+ "_view_module": "@jupyter-widgets/base",
1452
+ "_view_module_version": "2.0.0",
1453
+ "_view_name": "StyleView",
1454
+ "background": null,
1455
+ "description_width": "",
1456
+ "font_size": null,
1457
+ "text_color": null
1458
+ }
1459
+ },
1460
+ "52ce23907cd741a3a637e58e9649be7e": {
1461
+ "model_module": "@jupyter-widgets/base",
1462
+ "model_module_version": "2.0.0",
1463
+ "model_name": "LayoutModel",
1464
+ "state": {
1465
+ "_model_module": "@jupyter-widgets/base",
1466
+ "_model_module_version": "2.0.0",
1467
+ "_model_name": "LayoutModel",
1468
+ "_view_count": null,
1469
+ "_view_module": "@jupyter-widgets/base",
1470
+ "_view_module_version": "2.0.0",
1471
+ "_view_name": "LayoutView",
1472
+ "align_content": null,
1473
+ "align_items": null,
1474
+ "align_self": null,
1475
+ "border_bottom": null,
1476
+ "border_left": null,
1477
+ "border_right": null,
1478
+ "border_top": null,
1479
+ "bottom": null,
1480
+ "display": null,
1481
+ "flex": null,
1482
+ "flex_flow": null,
1483
+ "grid_area": null,
1484
+ "grid_auto_columns": null,
1485
+ "grid_auto_flow": null,
1486
+ "grid_auto_rows": null,
1487
+ "grid_column": null,
1488
+ "grid_gap": null,
1489
+ "grid_row": null,
1490
+ "grid_template_areas": null,
1491
+ "grid_template_columns": null,
1492
+ "grid_template_rows": null,
1493
+ "height": null,
1494
+ "justify_content": null,
1495
+ "justify_items": null,
1496
+ "left": null,
1497
+ "margin": null,
1498
+ "max_height": null,
1499
+ "max_width": null,
1500
+ "min_height": null,
1501
+ "min_width": null,
1502
+ "object_fit": null,
1503
+ "object_position": null,
1504
+ "order": null,
1505
+ "overflow": null,
1506
+ "padding": null,
1507
+ "right": null,
1508
+ "top": null,
1509
+ "visibility": null,
1510
+ "width": null
1511
+ }
1512
+ },
1513
+ "55a0608b33ed41e59e3769c5ca572640": {
1514
+ "model_module": "@jupyter-widgets/base",
1515
+ "model_module_version": "2.0.0",
1516
+ "model_name": "LayoutModel",
1517
+ "state": {
1518
+ "_model_module": "@jupyter-widgets/base",
1519
+ "_model_module_version": "2.0.0",
1520
+ "_model_name": "LayoutModel",
1521
+ "_view_count": null,
1522
+ "_view_module": "@jupyter-widgets/base",
1523
+ "_view_module_version": "2.0.0",
1524
+ "_view_name": "LayoutView",
1525
+ "align_content": null,
1526
+ "align_items": null,
1527
+ "align_self": null,
1528
+ "border_bottom": null,
1529
+ "border_left": null,
1530
+ "border_right": null,
1531
+ "border_top": null,
1532
+ "bottom": null,
1533
+ "display": null,
1534
+ "flex": null,
1535
+ "flex_flow": null,
1536
+ "grid_area": null,
1537
+ "grid_auto_columns": null,
1538
+ "grid_auto_flow": null,
1539
+ "grid_auto_rows": null,
1540
+ "grid_column": null,
1541
+ "grid_gap": null,
1542
+ "grid_row": null,
1543
+ "grid_template_areas": null,
1544
+ "grid_template_columns": null,
1545
+ "grid_template_rows": null,
1546
+ "height": null,
1547
+ "justify_content": null,
1548
+ "justify_items": null,
1549
+ "left": null,
1550
+ "margin": null,
1551
+ "max_height": null,
1552
+ "max_width": null,
1553
+ "min_height": null,
1554
+ "min_width": null,
1555
+ "object_fit": null,
1556
+ "object_position": null,
1557
+ "order": null,
1558
+ "overflow": null,
1559
+ "padding": null,
1560
+ "right": null,
1561
+ "top": null,
1562
+ "visibility": null,
1563
+ "width": null
1564
+ }
1565
+ },
1566
+ "560328fa0add43ac9de66469923fe4c5": {
1567
+ "model_module": "@jupyter-widgets/controls",
1568
+ "model_module_version": "2.0.0",
1569
+ "model_name": "HTMLStyleModel",
1570
+ "state": {
1571
+ "_model_module": "@jupyter-widgets/controls",
1572
+ "_model_module_version": "2.0.0",
1573
+ "_model_name": "HTMLStyleModel",
1574
+ "_view_count": null,
1575
+ "_view_module": "@jupyter-widgets/base",
1576
+ "_view_module_version": "2.0.0",
1577
+ "_view_name": "StyleView",
1578
+ "background": null,
1579
+ "description_width": "",
1580
+ "font_size": null,
1581
+ "text_color": null
1582
+ }
1583
+ },
1584
+ "577ee67e9cee48bd87b596e7659103f9": {
1585
+ "model_module": "@jupyter-widgets/base",
1586
+ "model_module_version": "2.0.0",
1587
+ "model_name": "LayoutModel",
1588
+ "state": {
1589
+ "_model_module": "@jupyter-widgets/base",
1590
+ "_model_module_version": "2.0.0",
1591
+ "_model_name": "LayoutModel",
1592
+ "_view_count": null,
1593
+ "_view_module": "@jupyter-widgets/base",
1594
+ "_view_module_version": "2.0.0",
1595
+ "_view_name": "LayoutView",
1596
+ "align_content": null,
1597
+ "align_items": null,
1598
+ "align_self": null,
1599
+ "border_bottom": null,
1600
+ "border_left": null,
1601
+ "border_right": null,
1602
+ "border_top": null,
1603
+ "bottom": null,
1604
+ "display": null,
1605
+ "flex": null,
1606
+ "flex_flow": null,
1607
+ "grid_area": null,
1608
+ "grid_auto_columns": null,
1609
+ "grid_auto_flow": null,
1610
+ "grid_auto_rows": null,
1611
+ "grid_column": null,
1612
+ "grid_gap": null,
1613
+ "grid_row": null,
1614
+ "grid_template_areas": null,
1615
+ "grid_template_columns": null,
1616
+ "grid_template_rows": null,
1617
+ "height": null,
1618
+ "justify_content": null,
1619
+ "justify_items": null,
1620
+ "left": null,
1621
+ "margin": null,
1622
+ "max_height": null,
1623
+ "max_width": null,
1624
+ "min_height": null,
1625
+ "min_width": null,
1626
+ "object_fit": null,
1627
+ "object_position": null,
1628
+ "order": null,
1629
+ "overflow": null,
1630
+ "padding": null,
1631
+ "right": null,
1632
+ "top": null,
1633
+ "visibility": "hidden",
1634
+ "width": null
1635
+ }
1636
+ },
1637
+ "5c4525f9c8664981bd3680eed1cf154d": {
1638
+ "model_module": "@jupyter-widgets/controls",
1639
+ "model_module_version": "2.0.0",
1640
+ "model_name": "ProgressStyleModel",
1641
+ "state": {
1642
+ "_model_module": "@jupyter-widgets/controls",
1643
+ "_model_module_version": "2.0.0",
1644
+ "_model_name": "ProgressStyleModel",
1645
+ "_view_count": null,
1646
+ "_view_module": "@jupyter-widgets/base",
1647
+ "_view_module_version": "2.0.0",
1648
+ "_view_name": "StyleView",
1649
+ "bar_color": null,
1650
+ "description_width": ""
1651
+ }
1652
+ },
1653
+ "5e0e050cf7bc45219d785a3ab1f10bec": {
1654
+ "model_module": "@jupyter-widgets/base",
1655
+ "model_module_version": "2.0.0",
1656
+ "model_name": "LayoutModel",
1657
+ "state": {
1658
+ "_model_module": "@jupyter-widgets/base",
1659
+ "_model_module_version": "2.0.0",
1660
+ "_model_name": "LayoutModel",
1661
+ "_view_count": null,
1662
+ "_view_module": "@jupyter-widgets/base",
1663
+ "_view_module_version": "2.0.0",
1664
+ "_view_name": "LayoutView",
1665
+ "align_content": null,
1666
+ "align_items": null,
1667
+ "align_self": null,
1668
+ "border_bottom": null,
1669
+ "border_left": null,
1670
+ "border_right": null,
1671
+ "border_top": null,
1672
+ "bottom": null,
1673
+ "display": null,
1674
+ "flex": null,
1675
+ "flex_flow": null,
1676
+ "grid_area": null,
1677
+ "grid_auto_columns": null,
1678
+ "grid_auto_flow": null,
1679
+ "grid_auto_rows": null,
1680
+ "grid_column": null,
1681
+ "grid_gap": null,
1682
+ "grid_row": null,
1683
+ "grid_template_areas": null,
1684
+ "grid_template_columns": null,
1685
+ "grid_template_rows": null,
1686
+ "height": null,
1687
+ "justify_content": null,
1688
+ "justify_items": null,
1689
+ "left": null,
1690
+ "margin": null,
1691
+ "max_height": null,
1692
+ "max_width": null,
1693
+ "min_height": null,
1694
+ "min_width": null,
1695
+ "object_fit": null,
1696
+ "object_position": null,
1697
+ "order": null,
1698
+ "overflow": null,
1699
+ "padding": null,
1700
+ "right": null,
1701
+ "top": null,
1702
+ "visibility": null,
1703
+ "width": null
1704
+ }
1705
+ },
1706
+ "5e40bef048f3441c99ae91260a13f545": {
1707
+ "model_module": "@jupyter-widgets/controls",
1708
+ "model_module_version": "2.0.0",
1709
+ "model_name": "HBoxModel",
1710
+ "state": {
1711
+ "_dom_classes": [],
1712
+ "_model_module": "@jupyter-widgets/controls",
1713
+ "_model_module_version": "2.0.0",
1714
+ "_model_name": "HBoxModel",
1715
+ "_view_count": null,
1716
+ "_view_module": "@jupyter-widgets/controls",
1717
+ "_view_module_version": "2.0.0",
1718
+ "_view_name": "HBoxView",
1719
+ "box_style": "",
1720
+ "children": [
1721
+ "IPY_MODEL_ef1cff32aed04ed89901e962c397b3ef",
1722
+ "IPY_MODEL_4b46ab41557547e8b1afacd9d4a4b370",
1723
+ "IPY_MODEL_3571a316fa3d401a9abd519a9bef510c"
1724
+ ],
1725
+ "layout": "IPY_MODEL_577ee67e9cee48bd87b596e7659103f9",
1726
+ "tabbable": null,
1727
+ "tooltip": null
1728
+ }
1729
+ },
1730
+ "6096733735134f0f8af21b42caf09301": {
1731
+ "model_module": "@jupyter-widgets/controls",
1732
+ "model_module_version": "2.0.0",
1733
+ "model_name": "HTMLStyleModel",
1734
+ "state": {
1735
+ "_model_module": "@jupyter-widgets/controls",
1736
+ "_model_module_version": "2.0.0",
1737
+ "_model_name": "HTMLStyleModel",
1738
+ "_view_count": null,
1739
+ "_view_module": "@jupyter-widgets/base",
1740
+ "_view_module_version": "2.0.0",
1741
+ "_view_name": "StyleView",
1742
+ "background": null,
1743
+ "description_width": "",
1744
+ "font_size": null,
1745
+ "text_color": null
1746
+ }
1747
+ },
1748
+ "6475f425e96b4974a7f1fd4dc7445314": {
1749
+ "model_module": "@jupyter-widgets/base",
1750
+ "model_module_version": "2.0.0",
1751
+ "model_name": "LayoutModel",
1752
+ "state": {
1753
+ "_model_module": "@jupyter-widgets/base",
1754
+ "_model_module_version": "2.0.0",
1755
+ "_model_name": "LayoutModel",
1756
+ "_view_count": null,
1757
+ "_view_module": "@jupyter-widgets/base",
1758
+ "_view_module_version": "2.0.0",
1759
+ "_view_name": "LayoutView",
1760
+ "align_content": null,
1761
+ "align_items": null,
1762
+ "align_self": null,
1763
+ "border_bottom": null,
1764
+ "border_left": null,
1765
+ "border_right": null,
1766
+ "border_top": null,
1767
+ "bottom": null,
1768
+ "display": null,
1769
+ "flex": null,
1770
+ "flex_flow": null,
1771
+ "grid_area": null,
1772
+ "grid_auto_columns": null,
1773
+ "grid_auto_flow": null,
1774
+ "grid_auto_rows": null,
1775
+ "grid_column": null,
1776
+ "grid_gap": null,
1777
+ "grid_row": null,
1778
+ "grid_template_areas": null,
1779
+ "grid_template_columns": null,
1780
+ "grid_template_rows": null,
1781
+ "height": null,
1782
+ "justify_content": null,
1783
+ "justify_items": null,
1784
+ "left": null,
1785
+ "margin": null,
1786
+ "max_height": null,
1787
+ "max_width": null,
1788
+ "min_height": null,
1789
+ "min_width": null,
1790
+ "object_fit": null,
1791
+ "object_position": null,
1792
+ "order": null,
1793
+ "overflow": null,
1794
+ "padding": null,
1795
+ "right": null,
1796
+ "top": null,
1797
+ "visibility": "hidden",
1798
+ "width": null
1799
+ }
1800
+ },
1801
+ "6c32dab276b147dc9ed4eeaf49f8637f": {
1802
+ "model_module": "@jupyter-widgets/base",
1803
+ "model_module_version": "2.0.0",
1804
+ "model_name": "LayoutModel",
1805
+ "state": {
1806
+ "_model_module": "@jupyter-widgets/base",
1807
+ "_model_module_version": "2.0.0",
1808
+ "_model_name": "LayoutModel",
1809
+ "_view_count": null,
1810
+ "_view_module": "@jupyter-widgets/base",
1811
+ "_view_module_version": "2.0.0",
1812
+ "_view_name": "LayoutView",
1813
+ "align_content": null,
1814
+ "align_items": null,
1815
+ "align_self": null,
1816
+ "border_bottom": null,
1817
+ "border_left": null,
1818
+ "border_right": null,
1819
+ "border_top": null,
1820
+ "bottom": null,
1821
+ "display": null,
1822
+ "flex": null,
1823
+ "flex_flow": null,
1824
+ "grid_area": null,
1825
+ "grid_auto_columns": null,
1826
+ "grid_auto_flow": null,
1827
+ "grid_auto_rows": null,
1828
+ "grid_column": null,
1829
+ "grid_gap": null,
1830
+ "grid_row": null,
1831
+ "grid_template_areas": null,
1832
+ "grid_template_columns": null,
1833
+ "grid_template_rows": null,
1834
+ "height": null,
1835
+ "justify_content": null,
1836
+ "justify_items": null,
1837
+ "left": null,
1838
+ "margin": null,
1839
+ "max_height": null,
1840
+ "max_width": null,
1841
+ "min_height": null,
1842
+ "min_width": null,
1843
+ "object_fit": null,
1844
+ "object_position": null,
1845
+ "order": null,
1846
+ "overflow": null,
1847
+ "padding": null,
1848
+ "right": null,
1849
+ "top": null,
1850
+ "visibility": null,
1851
+ "width": null
1852
+ }
1853
+ },
1854
+ "6f810f1c862f4d388328c29487bef3b7": {
1855
+ "model_module": "@jupyter-widgets/controls",
1856
+ "model_module_version": "2.0.0",
1857
+ "model_name": "HTMLStyleModel",
1858
+ "state": {
1859
+ "_model_module": "@jupyter-widgets/controls",
1860
+ "_model_module_version": "2.0.0",
1861
+ "_model_name": "HTMLStyleModel",
1862
+ "_view_count": null,
1863
+ "_view_module": "@jupyter-widgets/base",
1864
+ "_view_module_version": "2.0.0",
1865
+ "_view_name": "StyleView",
1866
+ "background": null,
1867
+ "description_width": "",
1868
+ "font_size": null,
1869
+ "text_color": null
1870
+ }
1871
+ },
1872
+ "6fb12657e4a94a6482078b9217db42cd": {
1873
+ "model_module": "@jupyter-widgets/controls",
1874
+ "model_module_version": "2.0.0",
1875
+ "model_name": "HTMLModel",
1876
+ "state": {
1877
+ "_dom_classes": [],
1878
+ "_model_module": "@jupyter-widgets/controls",
1879
+ "_model_module_version": "2.0.0",
1880
+ "_model_name": "HTMLModel",
1881
+ "_view_count": null,
1882
+ "_view_module": "@jupyter-widgets/controls",
1883
+ "_view_module_version": "2.0.0",
1884
+ "_view_name": "HTMLView",
1885
+ "description": "",
1886
+ "description_allow_html": false,
1887
+ "layout": "IPY_MODEL_381d045b3faa4d34b4395a11a55cc20d",
1888
+ "placeholder": "​",
1889
+ "style": "IPY_MODEL_8339b596fd0b41bd97798504156c16e3",
1890
+ "tabbable": null,
1891
+ "tooltip": null,
1892
+ "value": " 0/1 [00:00&lt;?, ?it/s]"
1893
+ }
1894
+ },
1895
+ "7274fe84ea2f47438bf993ba8df90729": {
1896
+ "model_module": "@jupyter-widgets/base",
1897
+ "model_module_version": "2.0.0",
1898
+ "model_name": "LayoutModel",
1899
+ "state": {
1900
+ "_model_module": "@jupyter-widgets/base",
1901
+ "_model_module_version": "2.0.0",
1902
+ "_model_name": "LayoutModel",
1903
+ "_view_count": null,
1904
+ "_view_module": "@jupyter-widgets/base",
1905
+ "_view_module_version": "2.0.0",
1906
+ "_view_name": "LayoutView",
1907
+ "align_content": null,
1908
+ "align_items": null,
1909
+ "align_self": null,
1910
+ "border_bottom": null,
1911
+ "border_left": null,
1912
+ "border_right": null,
1913
+ "border_top": null,
1914
+ "bottom": null,
1915
+ "display": null,
1916
+ "flex": null,
1917
+ "flex_flow": null,
1918
+ "grid_area": null,
1919
+ "grid_auto_columns": null,
1920
+ "grid_auto_flow": null,
1921
+ "grid_auto_rows": null,
1922
+ "grid_column": null,
1923
+ "grid_gap": null,
1924
+ "grid_row": null,
1925
+ "grid_template_areas": null,
1926
+ "grid_template_columns": null,
1927
+ "grid_template_rows": null,
1928
+ "height": null,
1929
+ "justify_content": null,
1930
+ "justify_items": null,
1931
+ "left": null,
1932
+ "margin": null,
1933
+ "max_height": null,
1934
+ "max_width": null,
1935
+ "min_height": null,
1936
+ "min_width": null,
1937
+ "object_fit": null,
1938
+ "object_position": null,
1939
+ "order": null,
1940
+ "overflow": null,
1941
+ "padding": null,
1942
+ "right": null,
1943
+ "top": null,
1944
+ "visibility": null,
1945
+ "width": null
1946
+ }
1947
+ },
1948
+ "727f3c07cd8347d88bc17e8eee157568": {
1949
+ "model_module": "@jupyter-widgets/controls",
1950
+ "model_module_version": "2.0.0",
1951
+ "model_name": "HTMLStyleModel",
1952
+ "state": {
1953
+ "_model_module": "@jupyter-widgets/controls",
1954
+ "_model_module_version": "2.0.0",
1955
+ "_model_name": "HTMLStyleModel",
1956
+ "_view_count": null,
1957
+ "_view_module": "@jupyter-widgets/base",
1958
+ "_view_module_version": "2.0.0",
1959
+ "_view_name": "StyleView",
1960
+ "background": null,
1961
+ "description_width": "",
1962
+ "font_size": null,
1963
+ "text_color": null
1964
+ }
1965
+ },
1966
+ "7768a9c4e5cf45e8a288512dce113d05": {
1967
+ "model_module": "@jupyter-widgets/controls",
1968
+ "model_module_version": "2.0.0",
1969
+ "model_name": "HTMLModel",
1970
+ "state": {
1971
+ "_dom_classes": [],
1972
+ "_model_module": "@jupyter-widgets/controls",
1973
+ "_model_module_version": "2.0.0",
1974
+ "_model_name": "HTMLModel",
1975
+ "_view_count": null,
1976
+ "_view_module": "@jupyter-widgets/controls",
1977
+ "_view_module_version": "2.0.0",
1978
+ "_view_name": "HTMLView",
1979
+ "description": "",
1980
+ "description_allow_html": false,
1981
+ "layout": "IPY_MODEL_e20906988def4763b0bc42f36bd8c8f5",
1982
+ "placeholder": "​",
1983
+ "style": "IPY_MODEL_44dbb66e4ff14e97bd956c995d2d8e16",
1984
+ "tabbable": null,
1985
+ "tooltip": null,
1986
+ "value": "Rendering pages:   0%"
1987
+ }
1988
+ },
1989
+ "7ebc9803f2534971995c7ed7eaa4e0b9": {
1990
+ "model_module": "@jupyter-widgets/controls",
1991
+ "model_module_version": "2.0.0",
1992
+ "model_name": "HTMLModel",
1993
+ "state": {
1994
+ "_dom_classes": [],
1995
+ "_model_module": "@jupyter-widgets/controls",
1996
+ "_model_module_version": "2.0.0",
1997
+ "_model_name": "HTMLModel",
1998
+ "_view_count": null,
1999
+ "_view_module": "@jupyter-widgets/controls",
2000
+ "_view_module_version": "2.0.0",
2001
+ "_view_name": "HTMLView",
2002
+ "description": "",
2003
+ "description_allow_html": false,
2004
+ "layout": "IPY_MODEL_efb700ee085d4521a9eaef1cb53a09da",
2005
+ "placeholder": "​",
2006
+ "style": "IPY_MODEL_f549175a52894eee944104536eac6fbb",
2007
+ "tabbable": null,
2008
+ "tooltip": null,
2009
+ "value": " 0/1 [00:00&lt;?, ?it/s]"
2010
+ }
2011
+ },
2012
+ "8339b596fd0b41bd97798504156c16e3": {
2013
+ "model_module": "@jupyter-widgets/controls",
2014
+ "model_module_version": "2.0.0",
2015
+ "model_name": "HTMLStyleModel",
2016
+ "state": {
2017
+ "_model_module": "@jupyter-widgets/controls",
2018
+ "_model_module_version": "2.0.0",
2019
+ "_model_name": "HTMLStyleModel",
2020
+ "_view_count": null,
2021
+ "_view_module": "@jupyter-widgets/base",
2022
+ "_view_module_version": "2.0.0",
2023
+ "_view_name": "StyleView",
2024
+ "background": null,
2025
+ "description_width": "",
2026
+ "font_size": null,
2027
+ "text_color": null
2028
+ }
2029
+ },
2030
+ "8dadb7c8de2c4d098037f3a10cb18482": {
2031
+ "model_module": "@jupyter-widgets/controls",
2032
+ "model_module_version": "2.0.0",
2033
+ "model_name": "ProgressStyleModel",
2034
+ "state": {
2035
+ "_model_module": "@jupyter-widgets/controls",
2036
+ "_model_module_version": "2.0.0",
2037
+ "_model_name": "ProgressStyleModel",
2038
+ "_view_count": null,
2039
+ "_view_module": "@jupyter-widgets/base",
2040
+ "_view_module_version": "2.0.0",
2041
+ "_view_name": "StyleView",
2042
+ "bar_color": null,
2043
+ "description_width": ""
2044
+ }
2045
+ },
2046
+ "909864fb18354f0993ae81cfcd561f0b": {
2047
+ "model_module": "@jupyter-widgets/base",
2048
+ "model_module_version": "2.0.0",
2049
+ "model_name": "LayoutModel",
2050
+ "state": {
2051
+ "_model_module": "@jupyter-widgets/base",
2052
+ "_model_module_version": "2.0.0",
2053
+ "_model_name": "LayoutModel",
2054
+ "_view_count": null,
2055
+ "_view_module": "@jupyter-widgets/base",
2056
+ "_view_module_version": "2.0.0",
2057
+ "_view_name": "LayoutView",
2058
+ "align_content": null,
2059
+ "align_items": null,
2060
+ "align_self": null,
2061
+ "border_bottom": null,
2062
+ "border_left": null,
2063
+ "border_right": null,
2064
+ "border_top": null,
2065
+ "bottom": null,
2066
+ "display": null,
2067
+ "flex": null,
2068
+ "flex_flow": null,
2069
+ "grid_area": null,
2070
+ "grid_auto_columns": null,
2071
+ "grid_auto_flow": null,
2072
+ "grid_auto_rows": null,
2073
+ "grid_column": null,
2074
+ "grid_gap": null,
2075
+ "grid_row": null,
2076
+ "grid_template_areas": null,
2077
+ "grid_template_columns": null,
2078
+ "grid_template_rows": null,
2079
+ "height": null,
2080
+ "justify_content": null,
2081
+ "justify_items": null,
2082
+ "left": null,
2083
+ "margin": null,
2084
+ "max_height": null,
2085
+ "max_width": null,
2086
+ "min_height": null,
2087
+ "min_width": null,
2088
+ "object_fit": null,
2089
+ "object_position": null,
2090
+ "order": null,
2091
+ "overflow": null,
2092
+ "padding": null,
2093
+ "right": null,
2094
+ "top": null,
2095
+ "visibility": null,
2096
+ "width": null
2097
+ }
2098
+ },
2099
+ "91e7e315ad134d3a90c4b4a3e4503f6b": {
2100
+ "model_module": "@jupyter-widgets/controls",
2101
+ "model_module_version": "2.0.0",
2102
+ "model_name": "FloatProgressModel",
2103
+ "state": {
2104
+ "_dom_classes": [],
2105
+ "_model_module": "@jupyter-widgets/controls",
2106
+ "_model_module_version": "2.0.0",
2107
+ "_model_name": "FloatProgressModel",
2108
+ "_view_count": null,
2109
+ "_view_module": "@jupyter-widgets/controls",
2110
+ "_view_module_version": "2.0.0",
2111
+ "_view_name": "ProgressView",
2112
+ "bar_style": "",
2113
+ "description": "",
2114
+ "description_allow_html": false,
2115
+ "layout": "IPY_MODEL_2e8dbacbfbbc4381b0bbd0c0427ee370",
2116
+ "max": 1.0,
2117
+ "min": 0.0,
2118
+ "orientation": "horizontal",
2119
+ "style": "IPY_MODEL_4b113c9d48ef40338ee8c3154d85ad6b",
2120
+ "tabbable": null,
2121
+ "tooltip": null,
2122
+ "value": 1.0
2123
+ }
2124
+ },
2125
+ "9272a282407941daa65b0af13de9a5cb": {
2126
+ "model_module": "@jupyter-widgets/base",
2127
+ "model_module_version": "2.0.0",
2128
+ "model_name": "LayoutModel",
2129
+ "state": {
2130
+ "_model_module": "@jupyter-widgets/base",
2131
+ "_model_module_version": "2.0.0",
2132
+ "_model_name": "LayoutModel",
2133
+ "_view_count": null,
2134
+ "_view_module": "@jupyter-widgets/base",
2135
+ "_view_module_version": "2.0.0",
2136
+ "_view_name": "LayoutView",
2137
+ "align_content": null,
2138
+ "align_items": null,
2139
+ "align_self": null,
2140
+ "border_bottom": null,
2141
+ "border_left": null,
2142
+ "border_right": null,
2143
+ "border_top": null,
2144
+ "bottom": null,
2145
+ "display": null,
2146
+ "flex": null,
2147
+ "flex_flow": null,
2148
+ "grid_area": null,
2149
+ "grid_auto_columns": null,
2150
+ "grid_auto_flow": null,
2151
+ "grid_auto_rows": null,
2152
+ "grid_column": null,
2153
+ "grid_gap": null,
2154
+ "grid_row": null,
2155
+ "grid_template_areas": null,
2156
+ "grid_template_columns": null,
2157
+ "grid_template_rows": null,
2158
+ "height": null,
2159
+ "justify_content": null,
2160
+ "justify_items": null,
2161
+ "left": null,
2162
+ "margin": null,
2163
+ "max_height": null,
2164
+ "max_width": null,
2165
+ "min_height": null,
2166
+ "min_width": null,
2167
+ "object_fit": null,
2168
+ "object_position": null,
2169
+ "order": null,
2170
+ "overflow": null,
2171
+ "padding": null,
2172
+ "right": null,
2173
+ "top": null,
2174
+ "visibility": "hidden",
2175
+ "width": null
2176
+ }
2177
+ },
2178
+ "931980339a8f48ba96beb8eaefd710e2": {
2179
+ "model_module": "@jupyter-widgets/base",
2180
+ "model_module_version": "2.0.0",
2181
+ "model_name": "LayoutModel",
2182
+ "state": {
2183
+ "_model_module": "@jupyter-widgets/base",
2184
+ "_model_module_version": "2.0.0",
2185
+ "_model_name": "LayoutModel",
2186
+ "_view_count": null,
2187
+ "_view_module": "@jupyter-widgets/base",
2188
+ "_view_module_version": "2.0.0",
2189
+ "_view_name": "LayoutView",
2190
+ "align_content": null,
2191
+ "align_items": null,
2192
+ "align_self": null,
2193
+ "border_bottom": null,
2194
+ "border_left": null,
2195
+ "border_right": null,
2196
+ "border_top": null,
2197
+ "bottom": null,
2198
+ "display": null,
2199
+ "flex": null,
2200
+ "flex_flow": null,
2201
+ "grid_area": null,
2202
+ "grid_auto_columns": null,
2203
+ "grid_auto_flow": null,
2204
+ "grid_auto_rows": null,
2205
+ "grid_column": null,
2206
+ "grid_gap": null,
2207
+ "grid_row": null,
2208
+ "grid_template_areas": null,
2209
+ "grid_template_columns": null,
2210
+ "grid_template_rows": null,
2211
+ "height": null,
2212
+ "justify_content": null,
2213
+ "justify_items": null,
2214
+ "left": null,
2215
+ "margin": null,
2216
+ "max_height": null,
2217
+ "max_width": null,
2218
+ "min_height": null,
2219
+ "min_width": null,
2220
+ "object_fit": null,
2221
+ "object_position": null,
2222
+ "order": null,
2223
+ "overflow": null,
2224
+ "padding": null,
2225
+ "right": null,
2226
+ "top": null,
2227
+ "visibility": null,
2228
+ "width": null
2229
+ }
2230
+ },
2231
+ "99d50723f6b9434f8f6eed613f243556": {
2232
+ "model_module": "@jupyter-widgets/controls",
2233
+ "model_module_version": "2.0.0",
2234
+ "model_name": "HTMLModel",
2235
+ "state": {
2236
+ "_dom_classes": [],
2237
+ "_model_module": "@jupyter-widgets/controls",
2238
+ "_model_module_version": "2.0.0",
2239
+ "_model_name": "HTMLModel",
2240
+ "_view_count": null,
2241
+ "_view_module": "@jupyter-widgets/controls",
2242
+ "_view_module_version": "2.0.0",
2243
+ "_view_name": "HTMLView",
2244
+ "description": "",
2245
+ "description_allow_html": false,
2246
+ "layout": "IPY_MODEL_909864fb18354f0993ae81cfcd561f0b",
2247
+ "placeholder": "​",
2248
+ "style": "IPY_MODEL_727f3c07cd8347d88bc17e8eee157568",
2249
+ "tabbable": null,
2250
+ "tooltip": null,
2251
+ "value": "Rendering pages:   0%"
2252
+ }
2253
+ },
2254
+ "9c510bd7f0d540978edbc8661c353210": {
2255
+ "model_module": "@jupyter-widgets/controls",
2256
+ "model_module_version": "2.0.0",
2257
+ "model_name": "FloatProgressModel",
2258
+ "state": {
2259
+ "_dom_classes": [],
2260
+ "_model_module": "@jupyter-widgets/controls",
2261
+ "_model_module_version": "2.0.0",
2262
+ "_model_name": "FloatProgressModel",
2263
+ "_view_count": null,
2264
+ "_view_module": "@jupyter-widgets/controls",
2265
+ "_view_module_version": "2.0.0",
2266
+ "_view_name": "ProgressView",
2267
+ "bar_style": "",
2268
+ "description": "",
2269
+ "description_allow_html": false,
2270
+ "layout": "IPY_MODEL_0e449b943f2d48eb9c9ed2d6bdfdb557",
2271
+ "max": 1.0,
2272
+ "min": 0.0,
2273
+ "orientation": "horizontal",
2274
+ "style": "IPY_MODEL_f161f0e36dd14f4ca732e43e7be3bb1d",
2275
+ "tabbable": null,
2276
+ "tooltip": null,
2277
+ "value": 1.0
2278
+ }
2279
+ },
2280
+ "9dabb33d83a94026b0643537203a019b": {
2281
+ "model_module": "@jupyter-widgets/controls",
2282
+ "model_module_version": "2.0.0",
2283
+ "model_name": "ProgressStyleModel",
2284
+ "state": {
2285
+ "_model_module": "@jupyter-widgets/controls",
2286
+ "_model_module_version": "2.0.0",
2287
+ "_model_name": "ProgressStyleModel",
2288
+ "_view_count": null,
2289
+ "_view_module": "@jupyter-widgets/base",
2290
+ "_view_module_version": "2.0.0",
2291
+ "_view_name": "StyleView",
2292
+ "bar_color": null,
2293
+ "description_width": ""
2294
+ }
2295
+ },
2296
+ "9f0ce84473764eb499e243e45f6c5c82": {
2297
+ "model_module": "@jupyter-widgets/base",
2298
+ "model_module_version": "2.0.0",
2299
+ "model_name": "LayoutModel",
2300
+ "state": {
2301
+ "_model_module": "@jupyter-widgets/base",
2302
+ "_model_module_version": "2.0.0",
2303
+ "_model_name": "LayoutModel",
2304
+ "_view_count": null,
2305
+ "_view_module": "@jupyter-widgets/base",
2306
+ "_view_module_version": "2.0.0",
2307
+ "_view_name": "LayoutView",
2308
+ "align_content": null,
2309
+ "align_items": null,
2310
+ "align_self": null,
2311
+ "border_bottom": null,
2312
+ "border_left": null,
2313
+ "border_right": null,
2314
+ "border_top": null,
2315
+ "bottom": null,
2316
+ "display": null,
2317
+ "flex": null,
2318
+ "flex_flow": null,
2319
+ "grid_area": null,
2320
+ "grid_auto_columns": null,
2321
+ "grid_auto_flow": null,
2322
+ "grid_auto_rows": null,
2323
+ "grid_column": null,
2324
+ "grid_gap": null,
2325
+ "grid_row": null,
2326
+ "grid_template_areas": null,
2327
+ "grid_template_columns": null,
2328
+ "grid_template_rows": null,
2329
+ "height": null,
2330
+ "justify_content": null,
2331
+ "justify_items": null,
2332
+ "left": null,
2333
+ "margin": null,
2334
+ "max_height": null,
2335
+ "max_width": null,
2336
+ "min_height": null,
2337
+ "min_width": null,
2338
+ "object_fit": null,
2339
+ "object_position": null,
2340
+ "order": null,
2341
+ "overflow": null,
2342
+ "padding": null,
2343
+ "right": null,
2344
+ "top": null,
2345
+ "visibility": null,
2346
+ "width": null
2347
+ }
2348
+ },
2349
+ "b32b0f4b5d9147bda684486bdba0dc0d": {
2350
+ "model_module": "@jupyter-widgets/controls",
2351
+ "model_module_version": "2.0.0",
2352
+ "model_name": "HBoxModel",
2353
+ "state": {
2354
+ "_dom_classes": [],
2355
+ "_model_module": "@jupyter-widgets/controls",
2356
+ "_model_module_version": "2.0.0",
2357
+ "_model_name": "HBoxModel",
2358
+ "_view_count": null,
2359
+ "_view_module": "@jupyter-widgets/controls",
2360
+ "_view_module_version": "2.0.0",
2361
+ "_view_name": "HBoxView",
2362
+ "box_style": "",
2363
+ "children": [
2364
+ "IPY_MODEL_dffd4f26c2fc498b9dc5d7810fa659b8",
2365
+ "IPY_MODEL_d397e370fdbf4a1c824cb369dbc79c3e",
2366
+ "IPY_MODEL_fde4c9c9bb6a423cb21eac39810c63d4"
2367
+ ],
2368
+ "layout": "IPY_MODEL_9272a282407941daa65b0af13de9a5cb",
2369
+ "tabbable": null,
2370
+ "tooltip": null
2371
+ }
2372
+ },
2373
+ "b35dccbf46ae4fd8a7e0cadfbfbfd626": {
2374
+ "model_module": "@jupyter-widgets/controls",
2375
+ "model_module_version": "2.0.0",
2376
+ "model_name": "ProgressStyleModel",
2377
+ "state": {
2378
+ "_model_module": "@jupyter-widgets/controls",
2379
+ "_model_module_version": "2.0.0",
2380
+ "_model_name": "ProgressStyleModel",
2381
+ "_view_count": null,
2382
+ "_view_module": "@jupyter-widgets/base",
2383
+ "_view_module_version": "2.0.0",
2384
+ "_view_name": "StyleView",
2385
+ "bar_color": null,
2386
+ "description_width": ""
2387
+ }
2388
+ },
2389
+ "b3df63c262144182b094f3c1cc3ccf2c": {
2390
+ "model_module": "@jupyter-widgets/base",
2391
+ "model_module_version": "2.0.0",
2392
+ "model_name": "LayoutModel",
2393
+ "state": {
2394
+ "_model_module": "@jupyter-widgets/base",
2395
+ "_model_module_version": "2.0.0",
2396
+ "_model_name": "LayoutModel",
2397
+ "_view_count": null,
2398
+ "_view_module": "@jupyter-widgets/base",
2399
+ "_view_module_version": "2.0.0",
2400
+ "_view_name": "LayoutView",
2401
+ "align_content": null,
2402
+ "align_items": null,
2403
+ "align_self": null,
2404
+ "border_bottom": null,
2405
+ "border_left": null,
2406
+ "border_right": null,
2407
+ "border_top": null,
2408
+ "bottom": null,
2409
+ "display": null,
2410
+ "flex": null,
2411
+ "flex_flow": null,
2412
+ "grid_area": null,
2413
+ "grid_auto_columns": null,
2414
+ "grid_auto_flow": null,
2415
+ "grid_auto_rows": null,
2416
+ "grid_column": null,
2417
+ "grid_gap": null,
2418
+ "grid_row": null,
2419
+ "grid_template_areas": null,
2420
+ "grid_template_columns": null,
2421
+ "grid_template_rows": null,
2422
+ "height": null,
2423
+ "justify_content": null,
2424
+ "justify_items": null,
2425
+ "left": null,
2426
+ "margin": null,
2427
+ "max_height": null,
2428
+ "max_width": null,
2429
+ "min_height": null,
2430
+ "min_width": null,
2431
+ "object_fit": null,
2432
+ "object_position": null,
2433
+ "order": null,
2434
+ "overflow": null,
2435
+ "padding": null,
2436
+ "right": null,
2437
+ "top": null,
2438
+ "visibility": "hidden",
2439
+ "width": null
2440
+ }
2441
+ },
2442
+ "b4d9791457264b7cb43659168e0cc56b": {
2443
+ "model_module": "@jupyter-widgets/controls",
2444
+ "model_module_version": "2.0.0",
2445
+ "model_name": "HTMLModel",
2446
+ "state": {
2447
+ "_dom_classes": [],
2448
+ "_model_module": "@jupyter-widgets/controls",
2449
+ "_model_module_version": "2.0.0",
2450
+ "_model_name": "HTMLModel",
2451
+ "_view_count": null,
2452
+ "_view_module": "@jupyter-widgets/controls",
2453
+ "_view_module_version": "2.0.0",
2454
+ "_view_name": "HTMLView",
2455
+ "description": "",
2456
+ "description_allow_html": false,
2457
+ "layout": "IPY_MODEL_d0defeb9757a449d8c8e16805610aa5a",
2458
+ "placeholder": "​",
2459
+ "style": "IPY_MODEL_d35c4148ceaa4af9b247beb26b5a5203",
2460
+ "tabbable": null,
2461
+ "tooltip": null,
2462
+ "value": " 0/1 [00:00&lt;?, ?it/s]"
2463
+ }
2464
+ },
2465
+ "c084e0a81dc0489e907da2ee73c66d1d": {
2466
+ "model_module": "@jupyter-widgets/controls",
2467
+ "model_module_version": "2.0.0",
2468
+ "model_name": "HTMLModel",
2469
+ "state": {
2470
+ "_dom_classes": [],
2471
+ "_model_module": "@jupyter-widgets/controls",
2472
+ "_model_module_version": "2.0.0",
2473
+ "_model_name": "HTMLModel",
2474
+ "_view_count": null,
2475
+ "_view_module": "@jupyter-widgets/controls",
2476
+ "_view_module_version": "2.0.0",
2477
+ "_view_name": "HTMLView",
2478
+ "description": "",
2479
+ "description_allow_html": false,
2480
+ "layout": "IPY_MODEL_d6eaf6e1b1144c87bf8eeddac2b5d289",
2481
+ "placeholder": "​",
2482
+ "style": "IPY_MODEL_0a9ffb62a42d49ccbb896041f568e18c",
2483
+ "tabbable": null,
2484
+ "tooltip": null,
2485
+ "value": "Rendering pages:   0%"
2486
+ }
2487
+ },
2488
+ "cb0ee18881d6417da77d80b8fdc760e6": {
2489
+ "model_module": "@jupyter-widgets/controls",
2490
+ "model_module_version": "2.0.0",
2491
+ "model_name": "HTMLStyleModel",
2492
+ "state": {
2493
+ "_model_module": "@jupyter-widgets/controls",
2494
+ "_model_module_version": "2.0.0",
2495
+ "_model_name": "HTMLStyleModel",
2496
+ "_view_count": null,
2497
+ "_view_module": "@jupyter-widgets/base",
2498
+ "_view_module_version": "2.0.0",
2499
+ "_view_name": "StyleView",
2500
+ "background": null,
2501
+ "description_width": "",
2502
+ "font_size": null,
2503
+ "text_color": null
2504
+ }
2505
+ },
2506
+ "cfbf78084dd04ad0b51e62e5b1bd0e14": {
2507
+ "model_module": "@jupyter-widgets/controls",
2508
+ "model_module_version": "2.0.0",
2509
+ "model_name": "HBoxModel",
2510
+ "state": {
2511
+ "_dom_classes": [],
2512
+ "_model_module": "@jupyter-widgets/controls",
2513
+ "_model_module_version": "2.0.0",
2514
+ "_model_name": "HBoxModel",
2515
+ "_view_count": null,
2516
+ "_view_module": "@jupyter-widgets/controls",
2517
+ "_view_module_version": "2.0.0",
2518
+ "_view_name": "HBoxView",
2519
+ "box_style": "",
2520
+ "children": [
2521
+ "IPY_MODEL_7768a9c4e5cf45e8a288512dce113d05",
2522
+ "IPY_MODEL_9c510bd7f0d540978edbc8661c353210",
2523
+ "IPY_MODEL_6fb12657e4a94a6482078b9217db42cd"
2524
+ ],
2525
+ "layout": "IPY_MODEL_d4bfe18022b94934a5c9ad87d5734e1b",
2526
+ "tabbable": null,
2527
+ "tooltip": null
2528
+ }
2529
+ },
2530
+ "d014137ed4b64afb85291d1ff72422b7": {
2531
+ "model_module": "@jupyter-widgets/controls",
2532
+ "model_module_version": "2.0.0",
2533
+ "model_name": "HTMLModel",
2534
+ "state": {
2535
+ "_dom_classes": [],
2536
+ "_model_module": "@jupyter-widgets/controls",
2537
+ "_model_module_version": "2.0.0",
2538
+ "_model_name": "HTMLModel",
2539
+ "_view_count": null,
2540
+ "_view_module": "@jupyter-widgets/controls",
2541
+ "_view_module_version": "2.0.0",
2542
+ "_view_name": "HTMLView",
2543
+ "description": "",
2544
+ "description_allow_html": false,
2545
+ "layout": "IPY_MODEL_d566d9e945e947de87ae4230db490af4",
2546
+ "placeholder": "​",
2547
+ "style": "IPY_MODEL_560328fa0add43ac9de66469923fe4c5",
2548
+ "tabbable": null,
2549
+ "tooltip": null,
2550
+ "value": "Rendering pages:   0%"
2551
+ }
2552
+ },
2553
+ "d0defeb9757a449d8c8e16805610aa5a": {
2554
+ "model_module": "@jupyter-widgets/base",
2555
+ "model_module_version": "2.0.0",
2556
+ "model_name": "LayoutModel",
2557
+ "state": {
2558
+ "_model_module": "@jupyter-widgets/base",
2559
+ "_model_module_version": "2.0.0",
2560
+ "_model_name": "LayoutModel",
2561
+ "_view_count": null,
2562
+ "_view_module": "@jupyter-widgets/base",
2563
+ "_view_module_version": "2.0.0",
2564
+ "_view_name": "LayoutView",
2565
+ "align_content": null,
2566
+ "align_items": null,
2567
+ "align_self": null,
2568
+ "border_bottom": null,
2569
+ "border_left": null,
2570
+ "border_right": null,
2571
+ "border_top": null,
2572
+ "bottom": null,
2573
+ "display": null,
2574
+ "flex": null,
2575
+ "flex_flow": null,
2576
+ "grid_area": null,
2577
+ "grid_auto_columns": null,
2578
+ "grid_auto_flow": null,
2579
+ "grid_auto_rows": null,
2580
+ "grid_column": null,
2581
+ "grid_gap": null,
2582
+ "grid_row": null,
2583
+ "grid_template_areas": null,
2584
+ "grid_template_columns": null,
2585
+ "grid_template_rows": null,
2586
+ "height": null,
2587
+ "justify_content": null,
2588
+ "justify_items": null,
2589
+ "left": null,
2590
+ "margin": null,
2591
+ "max_height": null,
2592
+ "max_width": null,
2593
+ "min_height": null,
2594
+ "min_width": null,
2595
+ "object_fit": null,
2596
+ "object_position": null,
2597
+ "order": null,
2598
+ "overflow": null,
2599
+ "padding": null,
2600
+ "right": null,
2601
+ "top": null,
2602
+ "visibility": null,
2603
+ "width": null
2604
+ }
2605
+ },
2606
+ "d2110f1f50234e218c6650fb9855d71e": {
2607
+ "model_module": "@jupyter-widgets/base",
2608
+ "model_module_version": "2.0.0",
2609
+ "model_name": "LayoutModel",
2610
+ "state": {
2611
+ "_model_module": "@jupyter-widgets/base",
2612
+ "_model_module_version": "2.0.0",
2613
+ "_model_name": "LayoutModel",
2614
+ "_view_count": null,
2615
+ "_view_module": "@jupyter-widgets/base",
2616
+ "_view_module_version": "2.0.0",
2617
+ "_view_name": "LayoutView",
2618
+ "align_content": null,
2619
+ "align_items": null,
2620
+ "align_self": null,
2621
+ "border_bottom": null,
2622
+ "border_left": null,
2623
+ "border_right": null,
2624
+ "border_top": null,
2625
+ "bottom": null,
2626
+ "display": null,
2627
+ "flex": null,
2628
+ "flex_flow": null,
2629
+ "grid_area": null,
2630
+ "grid_auto_columns": null,
2631
+ "grid_auto_flow": null,
2632
+ "grid_auto_rows": null,
2633
+ "grid_column": null,
2634
+ "grid_gap": null,
2635
+ "grid_row": null,
2636
+ "grid_template_areas": null,
2637
+ "grid_template_columns": null,
2638
+ "grid_template_rows": null,
2639
+ "height": null,
2640
+ "justify_content": null,
2641
+ "justify_items": null,
2642
+ "left": null,
2643
+ "margin": null,
2644
+ "max_height": null,
2645
+ "max_width": null,
2646
+ "min_height": null,
2647
+ "min_width": null,
2648
+ "object_fit": null,
2649
+ "object_position": null,
2650
+ "order": null,
2651
+ "overflow": null,
2652
+ "padding": null,
2653
+ "right": null,
2654
+ "top": null,
2655
+ "visibility": null,
2656
+ "width": null
2657
+ }
2658
+ },
2659
+ "d21cbf7b9387402dbd7554e8d5e0c076": {
2660
+ "model_module": "@jupyter-widgets/controls",
2661
+ "model_module_version": "2.0.0",
2662
+ "model_name": "HTMLStyleModel",
2663
+ "state": {
2664
+ "_model_module": "@jupyter-widgets/controls",
2665
+ "_model_module_version": "2.0.0",
2666
+ "_model_name": "HTMLStyleModel",
2667
+ "_view_count": null,
2668
+ "_view_module": "@jupyter-widgets/base",
2669
+ "_view_module_version": "2.0.0",
2670
+ "_view_name": "StyleView",
2671
+ "background": null,
2672
+ "description_width": "",
2673
+ "font_size": null,
2674
+ "text_color": null
2675
+ }
2676
+ },
2677
+ "d24ba4f60b62452d971a7eac3bac2a10": {
2678
+ "model_module": "@jupyter-widgets/base",
2679
+ "model_module_version": "2.0.0",
2680
+ "model_name": "LayoutModel",
2681
+ "state": {
2682
+ "_model_module": "@jupyter-widgets/base",
2683
+ "_model_module_version": "2.0.0",
2684
+ "_model_name": "LayoutModel",
2685
+ "_view_count": null,
2686
+ "_view_module": "@jupyter-widgets/base",
2687
+ "_view_module_version": "2.0.0",
2688
+ "_view_name": "LayoutView",
2689
+ "align_content": null,
2690
+ "align_items": null,
2691
+ "align_self": null,
2692
+ "border_bottom": null,
2693
+ "border_left": null,
2694
+ "border_right": null,
2695
+ "border_top": null,
2696
+ "bottom": null,
2697
+ "display": null,
2698
+ "flex": null,
2699
+ "flex_flow": null,
2700
+ "grid_area": null,
2701
+ "grid_auto_columns": null,
2702
+ "grid_auto_flow": null,
2703
+ "grid_auto_rows": null,
2704
+ "grid_column": null,
2705
+ "grid_gap": null,
2706
+ "grid_row": null,
2707
+ "grid_template_areas": null,
2708
+ "grid_template_columns": null,
2709
+ "grid_template_rows": null,
2710
+ "height": null,
2711
+ "justify_content": null,
2712
+ "justify_items": null,
2713
+ "left": null,
2714
+ "margin": null,
2715
+ "max_height": null,
2716
+ "max_width": null,
2717
+ "min_height": null,
2718
+ "min_width": null,
2719
+ "object_fit": null,
2720
+ "object_position": null,
2721
+ "order": null,
2722
+ "overflow": null,
2723
+ "padding": null,
2724
+ "right": null,
2725
+ "top": null,
2726
+ "visibility": null,
2727
+ "width": null
2728
+ }
2729
+ },
2730
+ "d35c4148ceaa4af9b247beb26b5a5203": {
2731
+ "model_module": "@jupyter-widgets/controls",
2732
+ "model_module_version": "2.0.0",
2733
+ "model_name": "HTMLStyleModel",
2734
+ "state": {
2735
+ "_model_module": "@jupyter-widgets/controls",
2736
+ "_model_module_version": "2.0.0",
2737
+ "_model_name": "HTMLStyleModel",
2738
+ "_view_count": null,
2739
+ "_view_module": "@jupyter-widgets/base",
2740
+ "_view_module_version": "2.0.0",
2741
+ "_view_name": "StyleView",
2742
+ "background": null,
2743
+ "description_width": "",
2744
+ "font_size": null,
2745
+ "text_color": null
2746
+ }
2747
+ },
2748
+ "d397e370fdbf4a1c824cb369dbc79c3e": {
2749
+ "model_module": "@jupyter-widgets/controls",
2750
+ "model_module_version": "2.0.0",
2751
+ "model_name": "FloatProgressModel",
2752
+ "state": {
2753
+ "_dom_classes": [],
2754
+ "_model_module": "@jupyter-widgets/controls",
2755
+ "_model_module_version": "2.0.0",
2756
+ "_model_name": "FloatProgressModel",
2757
+ "_view_count": null,
2758
+ "_view_module": "@jupyter-widgets/controls",
2759
+ "_view_module_version": "2.0.0",
2760
+ "_view_name": "ProgressView",
2761
+ "bar_style": "",
2762
+ "description": "",
2763
+ "description_allow_html": false,
2764
+ "layout": "IPY_MODEL_9f0ce84473764eb499e243e45f6c5c82",
2765
+ "max": 1.0,
2766
+ "min": 0.0,
2767
+ "orientation": "horizontal",
2768
+ "style": "IPY_MODEL_5c4525f9c8664981bd3680eed1cf154d",
2769
+ "tabbable": null,
2770
+ "tooltip": null,
2771
+ "value": 1.0
2772
+ }
2773
+ },
2774
+ "d4bfe18022b94934a5c9ad87d5734e1b": {
2775
+ "model_module": "@jupyter-widgets/base",
2776
+ "model_module_version": "2.0.0",
2777
+ "model_name": "LayoutModel",
2778
+ "state": {
2779
+ "_model_module": "@jupyter-widgets/base",
2780
+ "_model_module_version": "2.0.0",
2781
+ "_model_name": "LayoutModel",
2782
+ "_view_count": null,
2783
+ "_view_module": "@jupyter-widgets/base",
2784
+ "_view_module_version": "2.0.0",
2785
+ "_view_name": "LayoutView",
2786
+ "align_content": null,
2787
+ "align_items": null,
2788
+ "align_self": null,
2789
+ "border_bottom": null,
2790
+ "border_left": null,
2791
+ "border_right": null,
2792
+ "border_top": null,
2793
+ "bottom": null,
2794
+ "display": null,
2795
+ "flex": null,
2796
+ "flex_flow": null,
2797
+ "grid_area": null,
2798
+ "grid_auto_columns": null,
2799
+ "grid_auto_flow": null,
2800
+ "grid_auto_rows": null,
2801
+ "grid_column": null,
2802
+ "grid_gap": null,
2803
+ "grid_row": null,
2804
+ "grid_template_areas": null,
2805
+ "grid_template_columns": null,
2806
+ "grid_template_rows": null,
2807
+ "height": null,
2808
+ "justify_content": null,
2809
+ "justify_items": null,
2810
+ "left": null,
2811
+ "margin": null,
2812
+ "max_height": null,
2813
+ "max_width": null,
2814
+ "min_height": null,
2815
+ "min_width": null,
2816
+ "object_fit": null,
2817
+ "object_position": null,
2818
+ "order": null,
2819
+ "overflow": null,
2820
+ "padding": null,
2821
+ "right": null,
2822
+ "top": null,
2823
+ "visibility": "hidden",
2824
+ "width": null
2825
+ }
2826
+ },
2827
+ "d566d9e945e947de87ae4230db490af4": {
2828
+ "model_module": "@jupyter-widgets/base",
2829
+ "model_module_version": "2.0.0",
2830
+ "model_name": "LayoutModel",
2831
+ "state": {
2832
+ "_model_module": "@jupyter-widgets/base",
2833
+ "_model_module_version": "2.0.0",
2834
+ "_model_name": "LayoutModel",
2835
+ "_view_count": null,
2836
+ "_view_module": "@jupyter-widgets/base",
2837
+ "_view_module_version": "2.0.0",
2838
+ "_view_name": "LayoutView",
2839
+ "align_content": null,
2840
+ "align_items": null,
2841
+ "align_self": null,
2842
+ "border_bottom": null,
2843
+ "border_left": null,
2844
+ "border_right": null,
2845
+ "border_top": null,
2846
+ "bottom": null,
2847
+ "display": null,
2848
+ "flex": null,
2849
+ "flex_flow": null,
2850
+ "grid_area": null,
2851
+ "grid_auto_columns": null,
2852
+ "grid_auto_flow": null,
2853
+ "grid_auto_rows": null,
2854
+ "grid_column": null,
2855
+ "grid_gap": null,
2856
+ "grid_row": null,
2857
+ "grid_template_areas": null,
2858
+ "grid_template_columns": null,
2859
+ "grid_template_rows": null,
2860
+ "height": null,
2861
+ "justify_content": null,
2862
+ "justify_items": null,
2863
+ "left": null,
2864
+ "margin": null,
2865
+ "max_height": null,
2866
+ "max_width": null,
2867
+ "min_height": null,
2868
+ "min_width": null,
2869
+ "object_fit": null,
2870
+ "object_position": null,
2871
+ "order": null,
2872
+ "overflow": null,
2873
+ "padding": null,
2874
+ "right": null,
2875
+ "top": null,
2876
+ "visibility": null,
2877
+ "width": null
2878
+ }
2879
+ },
2880
+ "d5990c7975564d3297f7a3e8fb120269": {
2881
+ "model_module": "@jupyter-widgets/controls",
2882
+ "model_module_version": "2.0.0",
2883
+ "model_name": "HTMLModel",
2884
+ "state": {
2885
+ "_dom_classes": [],
2886
+ "_model_module": "@jupyter-widgets/controls",
2887
+ "_model_module_version": "2.0.0",
2888
+ "_model_name": "HTMLModel",
2889
+ "_view_count": null,
2890
+ "_view_module": "@jupyter-widgets/controls",
2891
+ "_view_module_version": "2.0.0",
2892
+ "_view_name": "HTMLView",
2893
+ "description": "",
2894
+ "description_allow_html": false,
2895
+ "layout": "IPY_MODEL_5e0e050cf7bc45219d785a3ab1f10bec",
2896
+ "placeholder": "​",
2897
+ "style": "IPY_MODEL_d21cbf7b9387402dbd7554e8d5e0c076",
2898
+ "tabbable": null,
2899
+ "tooltip": null,
2900
+ "value": "Rendering pages:   0%"
2901
+ }
2902
+ },
2903
+ "d6eaf6e1b1144c87bf8eeddac2b5d289": {
2904
+ "model_module": "@jupyter-widgets/base",
2905
+ "model_module_version": "2.0.0",
2906
+ "model_name": "LayoutModel",
2907
+ "state": {
2908
+ "_model_module": "@jupyter-widgets/base",
2909
+ "_model_module_version": "2.0.0",
2910
+ "_model_name": "LayoutModel",
2911
+ "_view_count": null,
2912
+ "_view_module": "@jupyter-widgets/base",
2913
+ "_view_module_version": "2.0.0",
2914
+ "_view_name": "LayoutView",
2915
+ "align_content": null,
2916
+ "align_items": null,
2917
+ "align_self": null,
2918
+ "border_bottom": null,
2919
+ "border_left": null,
2920
+ "border_right": null,
2921
+ "border_top": null,
2922
+ "bottom": null,
2923
+ "display": null,
2924
+ "flex": null,
2925
+ "flex_flow": null,
2926
+ "grid_area": null,
2927
+ "grid_auto_columns": null,
2928
+ "grid_auto_flow": null,
2929
+ "grid_auto_rows": null,
2930
+ "grid_column": null,
2931
+ "grid_gap": null,
2932
+ "grid_row": null,
2933
+ "grid_template_areas": null,
2934
+ "grid_template_columns": null,
2935
+ "grid_template_rows": null,
2936
+ "height": null,
2937
+ "justify_content": null,
2938
+ "justify_items": null,
2939
+ "left": null,
2940
+ "margin": null,
2941
+ "max_height": null,
2942
+ "max_width": null,
2943
+ "min_height": null,
2944
+ "min_width": null,
2945
+ "object_fit": null,
2946
+ "object_position": null,
2947
+ "order": null,
2948
+ "overflow": null,
2949
+ "padding": null,
2950
+ "right": null,
2951
+ "top": null,
2952
+ "visibility": null,
2953
+ "width": null
2954
+ }
2955
+ },
2956
+ "d88c24f8b2ae4215b8d77d01ef7f6a02": {
2957
+ "model_module": "@jupyter-widgets/base",
2958
+ "model_module_version": "2.0.0",
2959
+ "model_name": "LayoutModel",
2960
+ "state": {
2961
+ "_model_module": "@jupyter-widgets/base",
2962
+ "_model_module_version": "2.0.0",
2963
+ "_model_name": "LayoutModel",
2964
+ "_view_count": null,
2965
+ "_view_module": "@jupyter-widgets/base",
2966
+ "_view_module_version": "2.0.0",
2967
+ "_view_name": "LayoutView",
2968
+ "align_content": null,
2969
+ "align_items": null,
2970
+ "align_self": null,
2971
+ "border_bottom": null,
2972
+ "border_left": null,
2973
+ "border_right": null,
2974
+ "border_top": null,
2975
+ "bottom": null,
2976
+ "display": null,
2977
+ "flex": null,
2978
+ "flex_flow": null,
2979
+ "grid_area": null,
2980
+ "grid_auto_columns": null,
2981
+ "grid_auto_flow": null,
2982
+ "grid_auto_rows": null,
2983
+ "grid_column": null,
2984
+ "grid_gap": null,
2985
+ "grid_row": null,
2986
+ "grid_template_areas": null,
2987
+ "grid_template_columns": null,
2988
+ "grid_template_rows": null,
2989
+ "height": null,
2990
+ "justify_content": null,
2991
+ "justify_items": null,
2992
+ "left": null,
2993
+ "margin": null,
2994
+ "max_height": null,
2995
+ "max_width": null,
2996
+ "min_height": null,
2997
+ "min_width": null,
2998
+ "object_fit": null,
2999
+ "object_position": null,
3000
+ "order": null,
3001
+ "overflow": null,
3002
+ "padding": null,
3003
+ "right": null,
3004
+ "top": null,
3005
+ "visibility": null,
3006
+ "width": null
3007
+ }
3008
+ },
3009
+ "d8adc464cd1c4e29b0b5c729c9c4b2d8": {
3010
+ "model_module": "@jupyter-widgets/controls",
3011
+ "model_module_version": "2.0.0",
3012
+ "model_name": "HTMLModel",
3013
+ "state": {
3014
+ "_dom_classes": [],
3015
+ "_model_module": "@jupyter-widgets/controls",
3016
+ "_model_module_version": "2.0.0",
3017
+ "_model_name": "HTMLModel",
3018
+ "_view_count": null,
3019
+ "_view_module": "@jupyter-widgets/controls",
3020
+ "_view_module_version": "2.0.0",
3021
+ "_view_name": "HTMLView",
3022
+ "description": "",
3023
+ "description_allow_html": false,
3024
+ "layout": "IPY_MODEL_d88c24f8b2ae4215b8d77d01ef7f6a02",
3025
+ "placeholder": "​",
3026
+ "style": "IPY_MODEL_51463ffe39094a4c86e60da0f11cdbdd",
3027
+ "tabbable": null,
3028
+ "tooltip": null,
3029
+ "value": " 0/1 [00:00&lt;?, ?it/s]"
3030
+ }
3031
+ },
3032
+ "d8eaec32e2ab41eb9fe771d526296e97": {
3033
+ "model_module": "@jupyter-widgets/controls",
3034
+ "model_module_version": "2.0.0",
3035
+ "model_name": "HTMLModel",
3036
+ "state": {
3037
+ "_dom_classes": [],
3038
+ "_model_module": "@jupyter-widgets/controls",
3039
+ "_model_module_version": "2.0.0",
3040
+ "_model_name": "HTMLModel",
3041
+ "_view_count": null,
3042
+ "_view_module": "@jupyter-widgets/controls",
3043
+ "_view_module_version": "2.0.0",
3044
+ "_view_name": "HTMLView",
3045
+ "description": "",
3046
+ "description_allow_html": false,
3047
+ "layout": "IPY_MODEL_d24ba4f60b62452d971a7eac3bac2a10",
3048
+ "placeholder": "​",
3049
+ "style": "IPY_MODEL_cb0ee18881d6417da77d80b8fdc760e6",
3050
+ "tabbable": null,
3051
+ "tooltip": null,
3052
+ "value": "Rendering pages:   0%"
3053
+ }
3054
+ },
3055
+ "dffd4f26c2fc498b9dc5d7810fa659b8": {
3056
+ "model_module": "@jupyter-widgets/controls",
3057
+ "model_module_version": "2.0.0",
3058
+ "model_name": "HTMLModel",
3059
+ "state": {
3060
+ "_dom_classes": [],
3061
+ "_model_module": "@jupyter-widgets/controls",
3062
+ "_model_module_version": "2.0.0",
3063
+ "_model_name": "HTMLModel",
3064
+ "_view_count": null,
3065
+ "_view_module": "@jupyter-widgets/controls",
3066
+ "_view_module_version": "2.0.0",
3067
+ "_view_name": "HTMLView",
3068
+ "description": "",
3069
+ "description_allow_html": false,
3070
+ "layout": "IPY_MODEL_022a289a001e4e499b5d629364ec3cf2",
3071
+ "placeholder": "​",
3072
+ "style": "IPY_MODEL_6f810f1c862f4d388328c29487bef3b7",
3073
+ "tabbable": null,
3074
+ "tooltip": null,
3075
+ "value": "Rendering pages:   0%"
3076
+ }
3077
+ },
3078
+ "e092a03a328f4899b2b43c5b4e3695af": {
3079
+ "model_module": "@jupyter-widgets/controls",
3080
+ "model_module_version": "2.0.0",
3081
+ "model_name": "HTMLStyleModel",
3082
+ "state": {
3083
+ "_model_module": "@jupyter-widgets/controls",
3084
+ "_model_module_version": "2.0.0",
3085
+ "_model_name": "HTMLStyleModel",
3086
+ "_view_count": null,
3087
+ "_view_module": "@jupyter-widgets/base",
3088
+ "_view_module_version": "2.0.0",
3089
+ "_view_name": "StyleView",
3090
+ "background": null,
3091
+ "description_width": "",
3092
+ "font_size": null,
3093
+ "text_color": null
3094
+ }
3095
+ },
3096
+ "e20906988def4763b0bc42f36bd8c8f5": {
3097
+ "model_module": "@jupyter-widgets/base",
3098
+ "model_module_version": "2.0.0",
3099
+ "model_name": "LayoutModel",
3100
+ "state": {
3101
+ "_model_module": "@jupyter-widgets/base",
3102
+ "_model_module_version": "2.0.0",
3103
+ "_model_name": "LayoutModel",
3104
+ "_view_count": null,
3105
+ "_view_module": "@jupyter-widgets/base",
3106
+ "_view_module_version": "2.0.0",
3107
+ "_view_name": "LayoutView",
3108
+ "align_content": null,
3109
+ "align_items": null,
3110
+ "align_self": null,
3111
+ "border_bottom": null,
3112
+ "border_left": null,
3113
+ "border_right": null,
3114
+ "border_top": null,
3115
+ "bottom": null,
3116
+ "display": null,
3117
+ "flex": null,
3118
+ "flex_flow": null,
3119
+ "grid_area": null,
3120
+ "grid_auto_columns": null,
3121
+ "grid_auto_flow": null,
3122
+ "grid_auto_rows": null,
3123
+ "grid_column": null,
3124
+ "grid_gap": null,
3125
+ "grid_row": null,
3126
+ "grid_template_areas": null,
3127
+ "grid_template_columns": null,
3128
+ "grid_template_rows": null,
3129
+ "height": null,
3130
+ "justify_content": null,
3131
+ "justify_items": null,
3132
+ "left": null,
3133
+ "margin": null,
3134
+ "max_height": null,
3135
+ "max_width": null,
3136
+ "min_height": null,
3137
+ "min_width": null,
3138
+ "object_fit": null,
3139
+ "object_position": null,
3140
+ "order": null,
3141
+ "overflow": null,
3142
+ "padding": null,
3143
+ "right": null,
3144
+ "top": null,
3145
+ "visibility": null,
3146
+ "width": null
3147
+ }
3148
+ },
3149
+ "e637d6c0df174f33917c5a207172fa2b": {
3150
+ "model_module": "@jupyter-widgets/base",
3151
+ "model_module_version": "2.0.0",
3152
+ "model_name": "LayoutModel",
3153
+ "state": {
3154
+ "_model_module": "@jupyter-widgets/base",
3155
+ "_model_module_version": "2.0.0",
3156
+ "_model_name": "LayoutModel",
3157
+ "_view_count": null,
3158
+ "_view_module": "@jupyter-widgets/base",
3159
+ "_view_module_version": "2.0.0",
3160
+ "_view_name": "LayoutView",
3161
+ "align_content": null,
3162
+ "align_items": null,
3163
+ "align_self": null,
3164
+ "border_bottom": null,
3165
+ "border_left": null,
3166
+ "border_right": null,
3167
+ "border_top": null,
3168
+ "bottom": null,
3169
+ "display": null,
3170
+ "flex": null,
3171
+ "flex_flow": null,
3172
+ "grid_area": null,
3173
+ "grid_auto_columns": null,
3174
+ "grid_auto_flow": null,
3175
+ "grid_auto_rows": null,
3176
+ "grid_column": null,
3177
+ "grid_gap": null,
3178
+ "grid_row": null,
3179
+ "grid_template_areas": null,
3180
+ "grid_template_columns": null,
3181
+ "grid_template_rows": null,
3182
+ "height": null,
3183
+ "justify_content": null,
3184
+ "justify_items": null,
3185
+ "left": null,
3186
+ "margin": null,
3187
+ "max_height": null,
3188
+ "max_width": null,
3189
+ "min_height": null,
3190
+ "min_width": null,
3191
+ "object_fit": null,
3192
+ "object_position": null,
3193
+ "order": null,
3194
+ "overflow": null,
3195
+ "padding": null,
3196
+ "right": null,
3197
+ "top": null,
3198
+ "visibility": null,
3199
+ "width": null
3200
+ }
3201
+ },
3202
+ "e773fda450244e40b095ec4b21b899ae": {
3203
+ "model_module": "@jupyter-widgets/controls",
3204
+ "model_module_version": "2.0.0",
3205
+ "model_name": "HTMLModel",
3206
+ "state": {
3207
+ "_dom_classes": [],
3208
+ "_model_module": "@jupyter-widgets/controls",
3209
+ "_model_module_version": "2.0.0",
3210
+ "_model_name": "HTMLModel",
3211
+ "_view_count": null,
3212
+ "_view_module": "@jupyter-widgets/controls",
3213
+ "_view_module_version": "2.0.0",
3214
+ "_view_name": "HTMLView",
3215
+ "description": "",
3216
+ "description_allow_html": false,
3217
+ "layout": "IPY_MODEL_f30ded3cd5624dd88aed17301ca327df",
3218
+ "placeholder": "​",
3219
+ "style": "IPY_MODEL_6096733735134f0f8af21b42caf09301",
3220
+ "tabbable": null,
3221
+ "tooltip": null,
3222
+ "value": " 0/1 [00:00&lt;?, ?it/s]"
3223
+ }
3224
+ },
3225
+ "e87383909abf42d6960724773b6e90f7": {
3226
+ "model_module": "@jupyter-widgets/controls",
3227
+ "model_module_version": "2.0.0",
3228
+ "model_name": "HTMLStyleModel",
3229
+ "state": {
3230
+ "_model_module": "@jupyter-widgets/controls",
3231
+ "_model_module_version": "2.0.0",
3232
+ "_model_name": "HTMLStyleModel",
3233
+ "_view_count": null,
3234
+ "_view_module": "@jupyter-widgets/base",
3235
+ "_view_module_version": "2.0.0",
3236
+ "_view_name": "StyleView",
3237
+ "background": null,
3238
+ "description_width": "",
3239
+ "font_size": null,
3240
+ "text_color": null
3241
+ }
3242
+ },
3243
+ "e985937b7e3b4e6196ebb516634b6d3f": {
3244
+ "model_module": "@jupyter-widgets/base",
3245
+ "model_module_version": "2.0.0",
3246
+ "model_name": "LayoutModel",
3247
+ "state": {
3248
+ "_model_module": "@jupyter-widgets/base",
3249
+ "_model_module_version": "2.0.0",
3250
+ "_model_name": "LayoutModel",
3251
+ "_view_count": null,
3252
+ "_view_module": "@jupyter-widgets/base",
3253
+ "_view_module_version": "2.0.0",
3254
+ "_view_name": "LayoutView",
3255
+ "align_content": null,
3256
+ "align_items": null,
3257
+ "align_self": null,
3258
+ "border_bottom": null,
3259
+ "border_left": null,
3260
+ "border_right": null,
3261
+ "border_top": null,
3262
+ "bottom": null,
3263
+ "display": null,
3264
+ "flex": null,
3265
+ "flex_flow": null,
3266
+ "grid_area": null,
3267
+ "grid_auto_columns": null,
3268
+ "grid_auto_flow": null,
3269
+ "grid_auto_rows": null,
3270
+ "grid_column": null,
3271
+ "grid_gap": null,
3272
+ "grid_row": null,
3273
+ "grid_template_areas": null,
3274
+ "grid_template_columns": null,
3275
+ "grid_template_rows": null,
3276
+ "height": null,
3277
+ "justify_content": null,
3278
+ "justify_items": null,
3279
+ "left": null,
3280
+ "margin": null,
3281
+ "max_height": null,
3282
+ "max_width": null,
3283
+ "min_height": null,
3284
+ "min_width": null,
3285
+ "object_fit": null,
3286
+ "object_position": null,
3287
+ "order": null,
3288
+ "overflow": null,
3289
+ "padding": null,
3290
+ "right": null,
3291
+ "top": null,
3292
+ "visibility": null,
3293
+ "width": null
3294
+ }
3295
+ },
3296
+ "ea2ea03faf7c431eaed13f8e9de9b088": {
3297
+ "model_module": "@jupyter-widgets/controls",
3298
+ "model_module_version": "2.0.0",
3299
+ "model_name": "HBoxModel",
3300
+ "state": {
3301
+ "_dom_classes": [],
3302
+ "_model_module": "@jupyter-widgets/controls",
3303
+ "_model_module_version": "2.0.0",
3304
+ "_model_name": "HBoxModel",
3305
+ "_view_count": null,
3306
+ "_view_module": "@jupyter-widgets/controls",
3307
+ "_view_module_version": "2.0.0",
3308
+ "_view_name": "HBoxView",
3309
+ "box_style": "",
3310
+ "children": [
3311
+ "IPY_MODEL_d5990c7975564d3297f7a3e8fb120269",
3312
+ "IPY_MODEL_fc0447553b6449a7bf9a6a4949152d53",
3313
+ "IPY_MODEL_e773fda450244e40b095ec4b21b899ae"
3314
+ ],
3315
+ "layout": "IPY_MODEL_b3df63c262144182b094f3c1cc3ccf2c",
3316
+ "tabbable": null,
3317
+ "tooltip": null
3318
+ }
3319
+ },
3320
+ "eec78714154b4bf6aca4b7ccb3c157d1": {
3321
+ "model_module": "@jupyter-widgets/controls",
3322
+ "model_module_version": "2.0.0",
3323
+ "model_name": "HBoxModel",
3324
+ "state": {
3325
+ "_dom_classes": [],
3326
+ "_model_module": "@jupyter-widgets/controls",
3327
+ "_model_module_version": "2.0.0",
3328
+ "_model_name": "HBoxModel",
3329
+ "_view_count": null,
3330
+ "_view_module": "@jupyter-widgets/controls",
3331
+ "_view_module_version": "2.0.0",
3332
+ "_view_name": "HBoxView",
3333
+ "box_style": "",
3334
+ "children": [
3335
+ "IPY_MODEL_d8eaec32e2ab41eb9fe771d526296e97",
3336
+ "IPY_MODEL_1c18d689ebb846d4908b0ad1e05838e5",
3337
+ "IPY_MODEL_b4d9791457264b7cb43659168e0cc56b"
3338
+ ],
3339
+ "layout": "IPY_MODEL_6475f425e96b4974a7f1fd4dc7445314",
3340
+ "tabbable": null,
3341
+ "tooltip": null
3342
+ }
3343
+ },
3344
+ "eee280b518cb4bd7b9ee15666753bb55": {
3345
+ "model_module": "@jupyter-widgets/controls",
3346
+ "model_module_version": "2.0.0",
3347
+ "model_name": "HBoxModel",
3348
+ "state": {
3349
+ "_dom_classes": [],
3350
+ "_model_module": "@jupyter-widgets/controls",
3351
+ "_model_module_version": "2.0.0",
3352
+ "_model_name": "HBoxModel",
3353
+ "_view_count": null,
3354
+ "_view_module": "@jupyter-widgets/controls",
3355
+ "_view_module_version": "2.0.0",
3356
+ "_view_name": "HBoxView",
3357
+ "box_style": "",
3358
+ "children": [
3359
+ "IPY_MODEL_d014137ed4b64afb85291d1ff72422b7",
3360
+ "IPY_MODEL_91e7e315ad134d3a90c4b4a3e4503f6b",
3361
+ "IPY_MODEL_4e25f956fccc4b1b969fbaa7f290492a"
3362
+ ],
3363
+ "layout": "IPY_MODEL_f44b616b4083436da47aa948408d7012",
3364
+ "tabbable": null,
3365
+ "tooltip": null
3366
+ }
3367
+ },
3368
+ "ef1cff32aed04ed89901e962c397b3ef": {
3369
+ "model_module": "@jupyter-widgets/controls",
3370
+ "model_module_version": "2.0.0",
3371
+ "model_name": "HTMLModel",
3372
+ "state": {
3373
+ "_dom_classes": [],
3374
+ "_model_module": "@jupyter-widgets/controls",
3375
+ "_model_module_version": "2.0.0",
3376
+ "_model_name": "HTMLModel",
3377
+ "_view_count": null,
3378
+ "_view_module": "@jupyter-widgets/controls",
3379
+ "_view_module_version": "2.0.0",
3380
+ "_view_name": "HTMLView",
3381
+ "description": "",
3382
+ "description_allow_html": false,
3383
+ "layout": "IPY_MODEL_7274fe84ea2f47438bf993ba8df90729",
3384
+ "placeholder": "​",
3385
+ "style": "IPY_MODEL_fa1160b822694815984f5d3a771bcc15",
3386
+ "tabbable": null,
3387
+ "tooltip": null,
3388
+ "value": "Rendering pages:   0%"
3389
+ }
3390
+ },
3391
+ "efb700ee085d4521a9eaef1cb53a09da": {
3392
+ "model_module": "@jupyter-widgets/base",
3393
+ "model_module_version": "2.0.0",
3394
+ "model_name": "LayoutModel",
3395
+ "state": {
3396
+ "_model_module": "@jupyter-widgets/base",
3397
+ "_model_module_version": "2.0.0",
3398
+ "_model_name": "LayoutModel",
3399
+ "_view_count": null,
3400
+ "_view_module": "@jupyter-widgets/base",
3401
+ "_view_module_version": "2.0.0",
3402
+ "_view_name": "LayoutView",
3403
+ "align_content": null,
3404
+ "align_items": null,
3405
+ "align_self": null,
3406
+ "border_bottom": null,
3407
+ "border_left": null,
3408
+ "border_right": null,
3409
+ "border_top": null,
3410
+ "bottom": null,
3411
+ "display": null,
3412
+ "flex": null,
3413
+ "flex_flow": null,
3414
+ "grid_area": null,
3415
+ "grid_auto_columns": null,
3416
+ "grid_auto_flow": null,
3417
+ "grid_auto_rows": null,
3418
+ "grid_column": null,
3419
+ "grid_gap": null,
3420
+ "grid_row": null,
3421
+ "grid_template_areas": null,
3422
+ "grid_template_columns": null,
3423
+ "grid_template_rows": null,
3424
+ "height": null,
3425
+ "justify_content": null,
3426
+ "justify_items": null,
3427
+ "left": null,
3428
+ "margin": null,
3429
+ "max_height": null,
3430
+ "max_width": null,
3431
+ "min_height": null,
3432
+ "min_width": null,
3433
+ "object_fit": null,
3434
+ "object_position": null,
3435
+ "order": null,
3436
+ "overflow": null,
3437
+ "padding": null,
3438
+ "right": null,
3439
+ "top": null,
3440
+ "visibility": null,
3441
+ "width": null
3442
+ }
3443
+ },
3444
+ "f161f0e36dd14f4ca732e43e7be3bb1d": {
3445
+ "model_module": "@jupyter-widgets/controls",
3446
+ "model_module_version": "2.0.0",
3447
+ "model_name": "ProgressStyleModel",
3448
+ "state": {
3449
+ "_model_module": "@jupyter-widgets/controls",
3450
+ "_model_module_version": "2.0.0",
3451
+ "_model_name": "ProgressStyleModel",
3452
+ "_view_count": null,
3453
+ "_view_module": "@jupyter-widgets/base",
3454
+ "_view_module_version": "2.0.0",
3455
+ "_view_name": "StyleView",
3456
+ "bar_color": null,
3457
+ "description_width": ""
3458
+ }
3459
+ },
3460
+ "f30ded3cd5624dd88aed17301ca327df": {
3461
+ "model_module": "@jupyter-widgets/base",
3462
+ "model_module_version": "2.0.0",
3463
+ "model_name": "LayoutModel",
3464
+ "state": {
3465
+ "_model_module": "@jupyter-widgets/base",
3466
+ "_model_module_version": "2.0.0",
3467
+ "_model_name": "LayoutModel",
3468
+ "_view_count": null,
3469
+ "_view_module": "@jupyter-widgets/base",
3470
+ "_view_module_version": "2.0.0",
3471
+ "_view_name": "LayoutView",
3472
+ "align_content": null,
3473
+ "align_items": null,
3474
+ "align_self": null,
3475
+ "border_bottom": null,
3476
+ "border_left": null,
3477
+ "border_right": null,
3478
+ "border_top": null,
3479
+ "bottom": null,
3480
+ "display": null,
3481
+ "flex": null,
3482
+ "flex_flow": null,
3483
+ "grid_area": null,
3484
+ "grid_auto_columns": null,
3485
+ "grid_auto_flow": null,
3486
+ "grid_auto_rows": null,
3487
+ "grid_column": null,
3488
+ "grid_gap": null,
3489
+ "grid_row": null,
3490
+ "grid_template_areas": null,
3491
+ "grid_template_columns": null,
3492
+ "grid_template_rows": null,
3493
+ "height": null,
3494
+ "justify_content": null,
3495
+ "justify_items": null,
3496
+ "left": null,
3497
+ "margin": null,
3498
+ "max_height": null,
3499
+ "max_width": null,
3500
+ "min_height": null,
3501
+ "min_width": null,
3502
+ "object_fit": null,
3503
+ "object_position": null,
3504
+ "order": null,
3505
+ "overflow": null,
3506
+ "padding": null,
3507
+ "right": null,
3508
+ "top": null,
3509
+ "visibility": null,
3510
+ "width": null
3511
+ }
3512
+ },
3513
+ "f44b616b4083436da47aa948408d7012": {
3514
+ "model_module": "@jupyter-widgets/base",
3515
+ "model_module_version": "2.0.0",
3516
+ "model_name": "LayoutModel",
3517
+ "state": {
3518
+ "_model_module": "@jupyter-widgets/base",
3519
+ "_model_module_version": "2.0.0",
3520
+ "_model_name": "LayoutModel",
3521
+ "_view_count": null,
3522
+ "_view_module": "@jupyter-widgets/base",
3523
+ "_view_module_version": "2.0.0",
3524
+ "_view_name": "LayoutView",
3525
+ "align_content": null,
3526
+ "align_items": null,
3527
+ "align_self": null,
3528
+ "border_bottom": null,
3529
+ "border_left": null,
3530
+ "border_right": null,
3531
+ "border_top": null,
3532
+ "bottom": null,
3533
+ "display": null,
3534
+ "flex": null,
3535
+ "flex_flow": null,
3536
+ "grid_area": null,
3537
+ "grid_auto_columns": null,
3538
+ "grid_auto_flow": null,
3539
+ "grid_auto_rows": null,
3540
+ "grid_column": null,
3541
+ "grid_gap": null,
3542
+ "grid_row": null,
3543
+ "grid_template_areas": null,
3544
+ "grid_template_columns": null,
3545
+ "grid_template_rows": null,
3546
+ "height": null,
3547
+ "justify_content": null,
3548
+ "justify_items": null,
3549
+ "left": null,
3550
+ "margin": null,
3551
+ "max_height": null,
3552
+ "max_width": null,
3553
+ "min_height": null,
3554
+ "min_width": null,
3555
+ "object_fit": null,
3556
+ "object_position": null,
3557
+ "order": null,
3558
+ "overflow": null,
3559
+ "padding": null,
3560
+ "right": null,
3561
+ "top": null,
3562
+ "visibility": "hidden",
3563
+ "width": null
3564
+ }
3565
+ },
3566
+ "f549175a52894eee944104536eac6fbb": {
3567
+ "model_module": "@jupyter-widgets/controls",
3568
+ "model_module_version": "2.0.0",
3569
+ "model_name": "HTMLStyleModel",
3570
+ "state": {
3571
+ "_model_module": "@jupyter-widgets/controls",
3572
+ "_model_module_version": "2.0.0",
3573
+ "_model_name": "HTMLStyleModel",
3574
+ "_view_count": null,
3575
+ "_view_module": "@jupyter-widgets/base",
3576
+ "_view_module_version": "2.0.0",
3577
+ "_view_name": "StyleView",
3578
+ "background": null,
3579
+ "description_width": "",
3580
+ "font_size": null,
3581
+ "text_color": null
3582
+ }
3583
+ },
3584
+ "fa1160b822694815984f5d3a771bcc15": {
3585
+ "model_module": "@jupyter-widgets/controls",
3586
+ "model_module_version": "2.0.0",
3587
+ "model_name": "HTMLStyleModel",
3588
+ "state": {
3589
+ "_model_module": "@jupyter-widgets/controls",
3590
+ "_model_module_version": "2.0.0",
3591
+ "_model_name": "HTMLStyleModel",
3592
+ "_view_count": null,
3593
+ "_view_module": "@jupyter-widgets/base",
3594
+ "_view_module_version": "2.0.0",
3595
+ "_view_name": "StyleView",
3596
+ "background": null,
3597
+ "description_width": "",
3598
+ "font_size": null,
3599
+ "text_color": null
3600
+ }
3601
+ },
3602
+ "fc0447553b6449a7bf9a6a4949152d53": {
3603
+ "model_module": "@jupyter-widgets/controls",
3604
+ "model_module_version": "2.0.0",
3605
+ "model_name": "FloatProgressModel",
3606
+ "state": {
3607
+ "_dom_classes": [],
3608
+ "_model_module": "@jupyter-widgets/controls",
3609
+ "_model_module_version": "2.0.0",
3610
+ "_model_name": "FloatProgressModel",
3611
+ "_view_count": null,
3612
+ "_view_module": "@jupyter-widgets/controls",
3613
+ "_view_module_version": "2.0.0",
3614
+ "_view_name": "ProgressView",
3615
+ "bar_style": "",
3616
+ "description": "",
3617
+ "description_allow_html": false,
3618
+ "layout": "IPY_MODEL_931980339a8f48ba96beb8eaefd710e2",
3619
+ "max": 1.0,
3620
+ "min": 0.0,
3621
+ "orientation": "horizontal",
3622
+ "style": "IPY_MODEL_2d77ac4edf924b9d88b97929227b2497",
3623
+ "tabbable": null,
3624
+ "tooltip": null,
3625
+ "value": 1.0
3626
+ }
3627
+ },
3628
+ "fc795c53429c43328aceb24e9478dbf6": {
3629
+ "model_module": "@jupyter-widgets/base",
3630
+ "model_module_version": "2.0.0",
3631
+ "model_name": "LayoutModel",
3632
+ "state": {
3633
+ "_model_module": "@jupyter-widgets/base",
3634
+ "_model_module_version": "2.0.0",
3635
+ "_model_name": "LayoutModel",
3636
+ "_view_count": null,
3637
+ "_view_module": "@jupyter-widgets/base",
3638
+ "_view_module_version": "2.0.0",
3639
+ "_view_name": "LayoutView",
3640
+ "align_content": null,
3641
+ "align_items": null,
3642
+ "align_self": null,
3643
+ "border_bottom": null,
3644
+ "border_left": null,
3645
+ "border_right": null,
3646
+ "border_top": null,
3647
+ "bottom": null,
3648
+ "display": null,
3649
+ "flex": null,
3650
+ "flex_flow": null,
3651
+ "grid_area": null,
3652
+ "grid_auto_columns": null,
3653
+ "grid_auto_flow": null,
3654
+ "grid_auto_rows": null,
3655
+ "grid_column": null,
3656
+ "grid_gap": null,
3657
+ "grid_row": null,
3658
+ "grid_template_areas": null,
3659
+ "grid_template_columns": null,
3660
+ "grid_template_rows": null,
3661
+ "height": null,
3662
+ "justify_content": null,
3663
+ "justify_items": null,
3664
+ "left": null,
3665
+ "margin": null,
3666
+ "max_height": null,
3667
+ "max_width": null,
3668
+ "min_height": null,
3669
+ "min_width": null,
3670
+ "object_fit": null,
3671
+ "object_position": null,
3672
+ "order": null,
3673
+ "overflow": null,
3674
+ "padding": null,
3675
+ "right": null,
3676
+ "top": null,
3677
+ "visibility": "hidden",
3678
+ "width": null
3679
+ }
3680
+ },
3681
+ "fde4c9c9bb6a423cb21eac39810c63d4": {
3682
+ "model_module": "@jupyter-widgets/controls",
3683
+ "model_module_version": "2.0.0",
3684
+ "model_name": "HTMLModel",
3685
+ "state": {
3686
+ "_dom_classes": [],
3687
+ "_model_module": "@jupyter-widgets/controls",
3688
+ "_model_module_version": "2.0.0",
3689
+ "_model_name": "HTMLModel",
3690
+ "_view_count": null,
3691
+ "_view_module": "@jupyter-widgets/controls",
3692
+ "_view_module_version": "2.0.0",
3693
+ "_view_name": "HTMLView",
3694
+ "description": "",
3695
+ "description_allow_html": false,
3696
+ "layout": "IPY_MODEL_d2110f1f50234e218c6650fb9855d71e",
3697
+ "placeholder": "​",
3698
+ "style": "IPY_MODEL_e87383909abf42d6960724773b6e90f7",
3699
+ "tabbable": null,
3700
+ "tooltip": null,
3701
+ "value": " 0/1 [00:00&lt;?, ?it/s]"
3702
+ }
3703
+ }
3704
+ },
3705
+ "version_major": 2,
3706
+ "version_minor": 0
3707
+ }
600
3708
  }
601
3709
  },
602
3710
  "nbformat": 4,