natural-pdf 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. docs/api/index.md +386 -0
  2. docs/assets/favicon.png +3 -0
  3. docs/assets/favicon.svg +3 -0
  4. docs/assets/javascripts/custom.js +17 -0
  5. docs/assets/logo.svg +3 -0
  6. docs/assets/sample-screen.png +0 -0
  7. docs/assets/social-preview.png +17 -0
  8. docs/assets/social-preview.svg +17 -0
  9. docs/assets/stylesheets/custom.css +65 -0
  10. docs/document-qa/index.ipynb +435 -0
  11. docs/document-qa/index.md +79 -0
  12. docs/element-selection/index.ipynb +915 -0
  13. docs/element-selection/index.md +229 -0
  14. docs/index.md +170 -0
  15. docs/installation/index.md +69 -0
  16. docs/interactive-widget/index.ipynb +962 -0
  17. docs/interactive-widget/index.md +12 -0
  18. docs/layout-analysis/index.ipynb +818 -0
  19. docs/layout-analysis/index.md +185 -0
  20. docs/ocr/index.md +209 -0
  21. docs/pdf-navigation/index.ipynb +314 -0
  22. docs/pdf-navigation/index.md +97 -0
  23. docs/regions/index.ipynb +816 -0
  24. docs/regions/index.md +294 -0
  25. docs/tables/index.ipynb +658 -0
  26. docs/tables/index.md +144 -0
  27. docs/text-analysis/index.ipynb +370 -0
  28. docs/text-analysis/index.md +105 -0
  29. docs/text-extraction/index.ipynb +1478 -0
  30. docs/text-extraction/index.md +292 -0
  31. docs/tutorials/01-loading-and-extraction.ipynb +1710 -0
  32. docs/tutorials/01-loading-and-extraction.md +95 -0
  33. docs/tutorials/02-finding-elements.ipynb +340 -0
  34. docs/tutorials/02-finding-elements.md +149 -0
  35. docs/tutorials/03-extracting-blocks.ipynb +147 -0
  36. docs/tutorials/03-extracting-blocks.md +48 -0
  37. docs/tutorials/04-table-extraction.ipynb +114 -0
  38. docs/tutorials/04-table-extraction.md +50 -0
  39. docs/tutorials/05-excluding-content.ipynb +270 -0
  40. docs/tutorials/05-excluding-content.md +109 -0
  41. docs/tutorials/06-document-qa.ipynb +332 -0
  42. docs/tutorials/06-document-qa.md +91 -0
  43. docs/tutorials/07-layout-analysis.ipynb +288 -0
  44. docs/tutorials/07-layout-analysis.md +66 -0
  45. docs/tutorials/07-working-with-regions.ipynb +413 -0
  46. docs/tutorials/07-working-with-regions.md +151 -0
  47. docs/tutorials/08-spatial-navigation.ipynb +508 -0
  48. docs/tutorials/08-spatial-navigation.md +190 -0
  49. docs/tutorials/09-section-extraction.ipynb +2434 -0
  50. docs/tutorials/09-section-extraction.md +256 -0
  51. docs/tutorials/10-form-field-extraction.ipynb +512 -0
  52. docs/tutorials/10-form-field-extraction.md +201 -0
  53. docs/tutorials/11-enhanced-table-processing.ipynb +54 -0
  54. docs/tutorials/11-enhanced-table-processing.md +9 -0
  55. docs/tutorials/12-ocr-integration.ipynb +604 -0
  56. docs/tutorials/12-ocr-integration.md +175 -0
  57. docs/tutorials/13-semantic-search.ipynb +1328 -0
  58. docs/tutorials/13-semantic-search.md +77 -0
  59. docs/visual-debugging/index.ipynb +2970 -0
  60. docs/visual-debugging/index.md +157 -0
  61. docs/visual-debugging/region.png +0 -0
  62. natural_pdf/__init__.py +50 -33
  63. natural_pdf/analyzers/__init__.py +2 -1
  64. natural_pdf/analyzers/layout/base.py +32 -24
  65. natural_pdf/analyzers/layout/docling.py +131 -72
  66. natural_pdf/analyzers/layout/gemini.py +264 -0
  67. natural_pdf/analyzers/layout/layout_analyzer.py +156 -113
  68. natural_pdf/analyzers/layout/layout_manager.py +125 -58
  69. natural_pdf/analyzers/layout/layout_options.py +43 -17
  70. natural_pdf/analyzers/layout/paddle.py +152 -95
  71. natural_pdf/analyzers/layout/surya.py +164 -92
  72. natural_pdf/analyzers/layout/tatr.py +149 -84
  73. natural_pdf/analyzers/layout/yolo.py +89 -45
  74. natural_pdf/analyzers/text_options.py +22 -15
  75. natural_pdf/analyzers/text_structure.py +131 -85
  76. natural_pdf/analyzers/utils.py +30 -23
  77. natural_pdf/collections/pdf_collection.py +146 -97
  78. natural_pdf/core/__init__.py +1 -1
  79. natural_pdf/core/element_manager.py +419 -337
  80. natural_pdf/core/highlighting_service.py +268 -196
  81. natural_pdf/core/page.py +1044 -521
  82. natural_pdf/core/pdf.py +516 -313
  83. natural_pdf/elements/__init__.py +1 -1
  84. natural_pdf/elements/base.py +307 -225
  85. natural_pdf/elements/collections.py +805 -543
  86. natural_pdf/elements/line.py +39 -36
  87. natural_pdf/elements/rect.py +32 -30
  88. natural_pdf/elements/region.py +889 -879
  89. natural_pdf/elements/text.py +127 -99
  90. natural_pdf/exporters/__init__.py +0 -1
  91. natural_pdf/exporters/searchable_pdf.py +261 -102
  92. natural_pdf/ocr/__init__.py +57 -35
  93. natural_pdf/ocr/engine.py +150 -46
  94. natural_pdf/ocr/engine_easyocr.py +146 -150
  95. natural_pdf/ocr/engine_paddle.py +118 -175
  96. natural_pdf/ocr/engine_surya.py +78 -141
  97. natural_pdf/ocr/ocr_factory.py +114 -0
  98. natural_pdf/ocr/ocr_manager.py +122 -124
  99. natural_pdf/ocr/ocr_options.py +16 -20
  100. natural_pdf/ocr/utils.py +98 -0
  101. natural_pdf/qa/__init__.py +1 -1
  102. natural_pdf/qa/document_qa.py +119 -111
  103. natural_pdf/search/__init__.py +37 -31
  104. natural_pdf/search/haystack_search_service.py +312 -189
  105. natural_pdf/search/haystack_utils.py +186 -122
  106. natural_pdf/search/search_options.py +25 -14
  107. natural_pdf/search/search_service_protocol.py +12 -6
  108. natural_pdf/search/searchable_mixin.py +261 -176
  109. natural_pdf/selectors/__init__.py +2 -1
  110. natural_pdf/selectors/parser.py +159 -316
  111. natural_pdf/templates/__init__.py +1 -1
  112. natural_pdf/templates/spa/css/style.css +334 -0
  113. natural_pdf/templates/spa/index.html +31 -0
  114. natural_pdf/templates/spa/js/app.js +472 -0
  115. natural_pdf/templates/spa/words.txt +235976 -0
  116. natural_pdf/utils/debug.py +32 -0
  117. natural_pdf/utils/highlighting.py +8 -2
  118. natural_pdf/utils/identifiers.py +29 -0
  119. natural_pdf/utils/packaging.py +418 -0
  120. natural_pdf/utils/reading_order.py +65 -63
  121. natural_pdf/utils/text_extraction.py +195 -0
  122. natural_pdf/utils/visualization.py +70 -61
  123. natural_pdf/widgets/__init__.py +2 -3
  124. natural_pdf/widgets/viewer.py +749 -718
  125. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/METADATA +53 -17
  126. natural_pdf-0.1.6.dist-info/RECORD +141 -0
  127. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/WHEEL +1 -1
  128. natural_pdf-0.1.6.dist-info/top_level.txt +4 -0
  129. notebooks/Examples.ipynb +1293 -0
  130. pdfs/.gitkeep +0 -0
  131. pdfs/01-practice.pdf +543 -0
  132. pdfs/0500000US42001.pdf +0 -0
  133. pdfs/0500000US42007.pdf +0 -0
  134. pdfs/2014 Statistics.pdf +0 -0
  135. pdfs/2019 Statistics.pdf +0 -0
  136. pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  137. pdfs/needs-ocr.pdf +0 -0
  138. natural_pdf/templates/ocr_debug.html +0 -517
  139. natural_pdf-0.1.4.dist-info/RECORD +0 -61
  140. natural_pdf-0.1.4.dist-info/top_level.txt +0 -1
  141. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1328 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "6ba4c324",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Semantic Search Across Multiple Documents\n",
9
+ "\n",
10
+ "When working with a collection of PDFs, you might need to find information relevant to a specific query across all documents, not just within a single one. This tutorial demonstrates how to perform semantic search over a `PDFCollection`."
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 1,
16
+ "id": "573f47ae",
17
+ "metadata": {
18
+ "execution": {
19
+ "iopub.execute_input": "2025-04-21T21:26:26.425833Z",
20
+ "iopub.status.busy": "2025-04-21T21:26:26.425672Z",
21
+ "iopub.status.idle": "2025-04-21T21:26:26.430590Z",
22
+ "shell.execute_reply": "2025-04-21T21:26:26.430219Z"
23
+ }
24
+ },
25
+ "outputs": [],
26
+ "source": [
27
+ "#%pip install \"natural-pdf[all]\"\n",
28
+ "#%pip install \"natural-pdf[search]\" # Ensure search dependencies are installed"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": 2,
34
+ "id": "f40c7516",
35
+ "metadata": {
36
+ "execution": {
37
+ "iopub.execute_input": "2025-04-21T21:26:26.432199Z",
38
+ "iopub.status.busy": "2025-04-21T21:26:26.432080Z",
39
+ "iopub.status.idle": "2025-04-21T21:26:33.413144Z",
40
+ "shell.execute_reply": "2025-04-21T21:26:33.412658Z"
41
+ }
42
+ },
43
+ "outputs": [
44
+ {
45
+ "name": "stderr",
46
+ "output_type": "stream",
47
+ "text": [
48
+ "natural_pdf.collections.pdf_collection - INFO - Initializing 2 PDF objects...\n"
49
+ ]
50
+ },
51
+ {
52
+ "name": "stderr",
53
+ "output_type": "stream",
54
+ "text": [
55
+ "\r",
56
+ "Loading PDFs: 0%| | 0/2 [00:00<?, ?it/s]"
57
+ ]
58
+ },
59
+ {
60
+ "name": "stderr",
61
+ "output_type": "stream",
62
+ "text": [
63
+ "natural_pdf.core.pdf - INFO - Downloading PDF from URL: https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/01-practice.pdf\n"
64
+ ]
65
+ },
66
+ {
67
+ "name": "stderr",
68
+ "output_type": "stream",
69
+ "text": [
70
+ "natural_pdf.core.pdf - INFO - PDF downloaded to temporary file: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp0qxzzh1m.pdf\n"
71
+ ]
72
+ },
73
+ {
74
+ "name": "stderr",
75
+ "output_type": "stream",
76
+ "text": [
77
+ "natural_pdf.core.pdf - INFO - Initializing PDF from /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp0qxzzh1m.pdf\n"
78
+ ]
79
+ },
80
+ {
81
+ "name": "stderr",
82
+ "output_type": "stream",
83
+ "text": [
84
+ "natural_pdf.ocr.ocr_manager - INFO - OCRManager initialized.\n"
85
+ ]
86
+ },
87
+ {
88
+ "name": "stderr",
89
+ "output_type": "stream",
90
+ "text": [
91
+ "natural_pdf.analyzers.layout.layout_manager - INFO - LayoutManager initialized. Available engines: ['yolo', 'tatr', 'paddle', 'surya', 'docling', 'gemini']\n"
92
+ ]
93
+ },
94
+ {
95
+ "name": "stderr",
96
+ "output_type": "stream",
97
+ "text": [
98
+ "natural_pdf.core.highlighting_service - INFO - HighlightingService initialized with ColorManager.\n"
99
+ ]
100
+ },
101
+ {
102
+ "name": "stderr",
103
+ "output_type": "stream",
104
+ "text": [
105
+ "natural_pdf.core.pdf - INFO - Initialized HighlightingService.\n"
106
+ ]
107
+ },
108
+ {
109
+ "name": "stderr",
110
+ "output_type": "stream",
111
+ "text": [
112
+ "natural_pdf.core.pdf - INFO - PDF 'https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/01-practice.pdf' initialized with 1 pages.\n"
113
+ ]
114
+ },
115
+ {
116
+ "name": "stderr",
117
+ "output_type": "stream",
118
+ "text": [
119
+ "\r",
120
+ "Loading PDFs: 50%|█████████████████████████████████████████▌ | 1/2 [00:00<00:00, 6.99it/s]"
121
+ ]
122
+ },
123
+ {
124
+ "name": "stderr",
125
+ "output_type": "stream",
126
+ "text": [
127
+ "natural_pdf.core.pdf - INFO - Downloading PDF from URL: https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/Atlanta_Public_Schools_GA_sample.pdf\n"
128
+ ]
129
+ },
130
+ {
131
+ "name": "stderr",
132
+ "output_type": "stream",
133
+ "text": [
134
+ "natural_pdf.core.pdf - INFO - PDF downloaded to temporary file: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n"
135
+ ]
136
+ },
137
+ {
138
+ "name": "stderr",
139
+ "output_type": "stream",
140
+ "text": [
141
+ "natural_pdf.core.pdf - INFO - Initializing PDF from /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n"
142
+ ]
143
+ },
144
+ {
145
+ "name": "stderr",
146
+ "output_type": "stream",
147
+ "text": [
148
+ "natural_pdf.ocr.ocr_manager - INFO - OCRManager initialized.\n"
149
+ ]
150
+ },
151
+ {
152
+ "name": "stderr",
153
+ "output_type": "stream",
154
+ "text": [
155
+ "natural_pdf.analyzers.layout.layout_manager - INFO - LayoutManager initialized. Available engines: ['yolo', 'tatr', 'paddle', 'surya', 'docling', 'gemini']\n"
156
+ ]
157
+ },
158
+ {
159
+ "name": "stderr",
160
+ "output_type": "stream",
161
+ "text": [
162
+ "natural_pdf.core.highlighting_service - INFO - HighlightingService initialized with ColorManager.\n"
163
+ ]
164
+ },
165
+ {
166
+ "name": "stderr",
167
+ "output_type": "stream",
168
+ "text": [
169
+ "natural_pdf.core.pdf - INFO - Initialized HighlightingService.\n"
170
+ ]
171
+ },
172
+ {
173
+ "name": "stderr",
174
+ "output_type": "stream",
175
+ "text": [
176
+ "natural_pdf.core.pdf - INFO - PDF 'https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/Atlanta_Public_Schools_GA_sample.pdf' initialized with 5 pages.\n"
177
+ ]
178
+ },
179
+ {
180
+ "name": "stderr",
181
+ "output_type": "stream",
182
+ "text": [
183
+ "\r",
184
+ "Loading PDFs: 100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 8.19it/s]"
185
+ ]
186
+ },
187
+ {
188
+ "name": "stderr",
189
+ "output_type": "stream",
190
+ "text": [
191
+ "\r",
192
+ "Loading PDFs: 100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 7.97it/s]"
193
+ ]
194
+ },
195
+ {
196
+ "name": "stderr",
197
+ "output_type": "stream",
198
+ "text": [
199
+ "\n",
200
+ "natural_pdf.collections.pdf_collection - INFO - Successfully initialized 2 PDFs. Failed: 0\n"
201
+ ]
202
+ },
203
+ {
204
+ "name": "stdout",
205
+ "output_type": "stream",
206
+ "text": [
207
+ "Created collection with 2 PDFs.\n"
208
+ ]
209
+ }
210
+ ],
211
+ "source": [
212
+ "import logging\n",
213
+ "import natural_pdf\n",
214
+ "\n",
215
+ "# Optional: Configure logging to see progress\n",
216
+ "natural_pdf.configure_logging(level=logging.INFO)\n",
217
+ "\n",
218
+ "# Define the paths to your PDF files\n",
219
+ "pdf_paths = [\n",
220
+ " \"https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/01-practice.pdf\",\n",
221
+ " \"https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/Atlanta_Public_Schools_GA_sample.pdf\"\n",
222
+ " # Add more PDF paths as needed\n",
223
+ "]\n",
224
+ "\n",
225
+ "# Create a PDFCollection\n",
226
+ "collection = natural_pdf.PDFCollection(pdf_paths)\n",
227
+ "print(f\"Created collection with {len(collection.pdfs)} PDFs.\")"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "markdown",
232
+ "id": "4137d1ea",
233
+ "metadata": {},
234
+ "source": [
235
+ "## Initializing the Search Index\n",
236
+ "\n",
237
+ "Before performing a search, you need to initialize the search capabilities for the collection. This involves processing the documents and building an index."
238
+ ]
239
+ },
240
+ {
241
+ "cell_type": "code",
242
+ "execution_count": 3,
243
+ "id": "6b57f754",
244
+ "metadata": {
245
+ "execution": {
246
+ "iopub.execute_input": "2025-04-21T21:26:33.416364Z",
247
+ "iopub.status.busy": "2025-04-21T21:26:33.415349Z",
248
+ "iopub.status.idle": "2025-04-21T21:26:36.015751Z",
249
+ "shell.execute_reply": "2025-04-21T21:26:36.015429Z"
250
+ }
251
+ },
252
+ "outputs": [
253
+ {
254
+ "name": "stderr",
255
+ "output_type": "stream",
256
+ "text": [
257
+ "natural_pdf.search.searchable_mixin - INFO - Using default collection name 'default_collection' for in-memory service.\n"
258
+ ]
259
+ },
260
+ {
261
+ "name": "stderr",
262
+ "output_type": "stream",
263
+ "text": [
264
+ "natural_pdf.search.searchable_mixin - INFO - Creating new SearchService: name='default_collection', persist=False, model=default\n"
265
+ ]
266
+ },
267
+ {
268
+ "name": "stderr",
269
+ "output_type": "stream",
270
+ "text": [
271
+ "natural_pdf.search.haystack_search_service - INFO - HaystackSearchService initialized for collection='default_collection' (persist=False, model='sentence-transformers/all-MiniLM-L6-v2'). Default path: './natural_pdf_index'\n"
272
+ ]
273
+ },
274
+ {
275
+ "name": "stderr",
276
+ "output_type": "stream",
277
+ "text": [
278
+ "natural_pdf.search - INFO - Created new HaystackSearchService instance for collection 'default_collection'.\n"
279
+ ]
280
+ },
281
+ {
282
+ "name": "stderr",
283
+ "output_type": "stream",
284
+ "text": [
285
+ "natural_pdf.search.searchable_mixin - INFO - index=True: Proceeding to index collection immediately after search initialization.\n"
286
+ ]
287
+ },
288
+ {
289
+ "name": "stderr",
290
+ "output_type": "stream",
291
+ "text": [
292
+ "natural_pdf.search.searchable_mixin - INFO - Starting internal indexing process into SearchService collection 'default_collection'...\n"
293
+ ]
294
+ },
295
+ {
296
+ "name": "stderr",
297
+ "output_type": "stream",
298
+ "text": [
299
+ "natural_pdf.search.searchable_mixin - INFO - Prepared 6 indexable items for indexing.\n"
300
+ ]
301
+ },
302
+ {
303
+ "name": "stderr",
304
+ "output_type": "stream",
305
+ "text": [
306
+ "natural_pdf.search.haystack_search_service - INFO - Index request for collection='default_collection', docs=6, model='sentence-transformers/all-MiniLM-L6-v2', force=False, persist=False\n"
307
+ ]
308
+ },
309
+ {
310
+ "name": "stderr",
311
+ "output_type": "stream",
312
+ "text": [
313
+ "natural_pdf.search.haystack_search_service - INFO - Created SentenceTransformersDocumentEmbedder. Model: sentence-transformers/all-MiniLM-L6-v2, Device: ComponentDevice(_single_device=Device(type=<DeviceType.MPS: 'mps'>, id=None), _multiple_devices=None)\n"
314
+ ]
315
+ },
316
+ {
317
+ "name": "stderr",
318
+ "output_type": "stream",
319
+ "text": [
320
+ "natural_pdf.search.haystack_search_service - INFO - Preparing Haystack Documents from 6 indexable items...\n"
321
+ ]
322
+ },
323
+ {
324
+ "name": "stderr",
325
+ "output_type": "stream",
326
+ "text": [
327
+ "natural_pdf.search.haystack_search_service - INFO - Embedding 6 documents using 'sentence-transformers/all-MiniLM-L6-v2'...\n"
328
+ ]
329
+ },
330
+ {
331
+ "data": {
332
+ "application/vnd.jupyter.widget-view+json": {
333
+ "model_id": "cf2741c35c4e430a80c721891fe023d9",
334
+ "version_major": 2,
335
+ "version_minor": 0
336
+ },
337
+ "text/plain": [
338
+ "Batches: 0%| | 0/1 [00:00<?, ?it/s]"
339
+ ]
340
+ },
341
+ "metadata": {},
342
+ "output_type": "display_data"
343
+ },
344
+ {
345
+ "name": "stderr",
346
+ "output_type": "stream",
347
+ "text": [
348
+ "natural_pdf.search.haystack_search_service - INFO - Successfully embedded 6 documents.\n"
349
+ ]
350
+ },
351
+ {
352
+ "name": "stderr",
353
+ "output_type": "stream",
354
+ "text": [
355
+ "natural_pdf.search.haystack_search_service - INFO - Writing 6 embedded documents to store 'default_collection'...\n"
356
+ ]
357
+ },
358
+ {
359
+ "name": "stderr",
360
+ "output_type": "stream",
361
+ "text": [
362
+ "natural_pdf.search.haystack_search_service - INFO - Successfully wrote 6 documents to store 'default_collection'.\n"
363
+ ]
364
+ },
365
+ {
366
+ "name": "stderr",
367
+ "output_type": "stream",
368
+ "text": [
369
+ "natural_pdf.search.haystack_search_service - INFO - Store 'default_collection' document count after write: 6\n"
370
+ ]
371
+ },
372
+ {
373
+ "name": "stderr",
374
+ "output_type": "stream",
375
+ "text": [
376
+ "natural_pdf.search.searchable_mixin - INFO - Successfully completed indexing into SearchService collection 'default_collection'.\n"
377
+ ]
378
+ },
379
+ {
380
+ "name": "stdout",
381
+ "output_type": "stream",
382
+ "text": [
383
+ "Search index initialized.\n"
384
+ ]
385
+ }
386
+ ],
387
+ "source": [
388
+ "# Initialize search. 'index=True' builds the index immediately.\n",
389
+ "# This might take some time depending on the number and size of PDFs.\n",
390
+ "collection.init_search(index=True) \n",
391
+ "print(\"Search index initialized.\")"
392
+ ]
393
+ },
394
+ {
395
+ "cell_type": "markdown",
396
+ "id": "e1e86b3b",
397
+ "metadata": {},
398
+ "source": [
399
+ "## Performing a Semantic Search\n",
400
+ "\n",
401
+ "Once the index is ready, you can use the `find_relevant()` method to search for content semantically related to your query."
402
+ ]
403
+ },
404
+ {
405
+ "cell_type": "code",
406
+ "execution_count": 4,
407
+ "id": "0d5308cd",
408
+ "metadata": {
409
+ "execution": {
410
+ "iopub.execute_input": "2025-04-21T21:26:36.017436Z",
411
+ "iopub.status.busy": "2025-04-21T21:26:36.017297Z",
412
+ "iopub.status.idle": "2025-04-21T21:26:36.167418Z",
413
+ "shell.execute_reply": "2025-04-21T21:26:36.167083Z"
414
+ }
415
+ },
416
+ "outputs": [
417
+ {
418
+ "name": "stderr",
419
+ "output_type": "stream",
420
+ "text": [
421
+ "natural_pdf.search.searchable_mixin - INFO - Searching collection 'default_collection' via HaystackSearchService...\n"
422
+ ]
423
+ },
424
+ {
425
+ "name": "stderr",
426
+ "output_type": "stream",
427
+ "text": [
428
+ "natural_pdf.search.haystack_search_service - INFO - Search request for collection='default_collection', query_type=str, options=TextSearchOptions(top_k=10, retriever_top_k=20, filters=None, use_reranker=True, reranker_instance=None, reranker_model=None, reranker_api_key=None)\n"
429
+ ]
430
+ },
431
+ {
432
+ "name": "stderr",
433
+ "output_type": "stream",
434
+ "text": [
435
+ "natural_pdf.search.haystack_search_service - INFO - Created SentenceTransformersTextEmbedder. Model: sentence-transformers/all-MiniLM-L6-v2, Device: ComponentDevice(_single_device=Device(type=<DeviceType.MPS: 'mps'>, id=None), _multiple_devices=None)\n"
436
+ ]
437
+ },
438
+ {
439
+ "data": {
440
+ "application/vnd.jupyter.widget-view+json": {
441
+ "model_id": "4fe0753b29774d8daf7ecb3f89719349",
442
+ "version_major": 2,
443
+ "version_minor": 0
444
+ },
445
+ "text/plain": [
446
+ "Batches: 0%| | 0/1 [00:00<?, ?it/s]"
447
+ ]
448
+ },
449
+ "metadata": {},
450
+ "output_type": "display_data"
451
+ },
452
+ {
453
+ "name": "stderr",
454
+ "output_type": "stream",
455
+ "text": [
456
+ "natural_pdf.search.haystack_search_service - INFO - Running retrieval pipeline for collection 'default_collection'...\n"
457
+ ]
458
+ },
459
+ {
460
+ "name": "stderr",
461
+ "output_type": "stream",
462
+ "text": [
463
+ "natural_pdf.search.haystack_search_service - INFO - Retrieved 6 documents.\n"
464
+ ]
465
+ },
466
+ {
467
+ "name": "stderr",
468
+ "output_type": "stream",
469
+ "text": [
470
+ "natural_pdf.search.searchable_mixin - INFO - SearchService returned 6 results from collection 'default_collection'.\n"
471
+ ]
472
+ },
473
+ {
474
+ "name": "stdout",
475
+ "output_type": "stream",
476
+ "text": [
477
+ "Found 6 results for 'american president':\n"
478
+ ]
479
+ }
480
+ ],
481
+ "source": [
482
+ "# Perform a search query\n",
483
+ "query = \"american president\"\n",
484
+ "results = collection.find_relevant(query)\n",
485
+ "\n",
486
+ "print(f\"Found {len(results)} results for '{query}':\")"
487
+ ]
488
+ },
489
+ {
490
+ "cell_type": "markdown",
491
+ "id": "fe6158f2",
492
+ "metadata": {},
493
+ "source": [
494
+ "## Understanding Search Results\n",
495
+ "\n",
496
+ "The `find_relevant()` method returns a list of dictionaries, each representing a relevant text chunk found in one of the PDFs. Each result includes:\n",
497
+ "\n",
498
+ "* `pdf_path`: The path to the PDF document where the result was found.\n",
499
+ "* `page_number`: The page number within the PDF.\n",
500
+ "* `score`: A relevance score (higher means more relevant).\n",
501
+ "* `content_snippet`: A snippet of the text chunk that matched the query."
502
+ ]
503
+ },
504
+ {
505
+ "cell_type": "code",
506
+ "execution_count": 5,
507
+ "id": "5ed7a221",
508
+ "metadata": {
509
+ "execution": {
510
+ "iopub.execute_input": "2025-04-21T21:26:36.168858Z",
511
+ "iopub.status.busy": "2025-04-21T21:26:36.168719Z",
512
+ "iopub.status.idle": "2025-04-21T21:26:36.171586Z",
513
+ "shell.execute_reply": "2025-04-21T21:26:36.171300Z"
514
+ }
515
+ },
516
+ "outputs": [
517
+ {
518
+ "name": "stdout",
519
+ "output_type": "stream",
520
+ "text": [
521
+ " 1. PDF: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n",
522
+ " Page: 2 (Score: 0.0708)\n",
523
+ " Snippet: \n",
524
+ " \n",
525
+ " Library Weeding Log ...\n",
526
+ " 2. PDF: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n",
527
+ " Page: 5 (Score: 0.0669)\n",
528
+ " Snippet: \n",
529
+ " \n",
530
+ " Library Weeding Log ...\n",
531
+ " 3. PDF: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp0qxzzh1m.pdf\n",
532
+ " Page: 1 (Score: -0.0040)\n",
533
+ " Snippet: \n",
534
+ " \n",
535
+ " ...\n",
536
+ " 4. PDF: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n",
537
+ " Page: 4 (Score: -0.0245)\n",
538
+ " Snippet: \n",
539
+ " \n",
540
+ " Library Weeding Log ...\n",
541
+ " 5. PDF: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n",
542
+ " Page: 3 (Score: -0.0445)\n",
543
+ " Snippet: \n",
544
+ " \n",
545
+ " Library Weeding Log ...\n",
546
+ " 6. PDF: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n",
547
+ " Page: 1 (Score: -0.0473)\n",
548
+ " Snippet: \n",
549
+ " \n",
550
+ " Library Weeding Log ...\n"
551
+ ]
552
+ }
553
+ ],
554
+ "source": [
555
+ "# Process and display the results\n",
556
+ "if results:\n",
557
+ " for i, result in enumerate(results):\n",
558
+ " print(f\" {i+1}. PDF: {result['pdf_path']}\")\n",
559
+ " print(f\" Page: {result['page_number']} (Score: {result['score']:.4f})\")\n",
560
+ " # Display a snippet of the content\n",
561
+ " snippet = result.get('content_snippet', '')\n",
562
+ " print(f\" Snippet: {snippet}...\") \n",
563
+ "else:\n",
564
+ " print(\" No relevant results found.\")\n",
565
+ "\n",
566
+ "# You can access the full content if needed via the result object, \n",
567
+ "# though 'content_snippet' is usually sufficient for display."
568
+ ]
569
+ },
570
+ {
571
+ "cell_type": "markdown",
572
+ "id": "1c628c5f",
573
+ "metadata": {},
574
+ "source": [
575
+ "Semantic search allows you to efficiently query large sets of documents to find the most relevant information without needing exact keyword matches, leveraging the meaning and context of your query. "
576
+ ]
577
+ }
578
+ ],
579
+ "metadata": {
580
+ "jupytext": {
581
+ "cell_metadata_filter": "-all",
582
+ "main_language": "python",
583
+ "notebook_metadata_filter": "-all"
584
+ },
585
+ "language_info": {
586
+ "codemirror_mode": {
587
+ "name": "ipython",
588
+ "version": 3
589
+ },
590
+ "file_extension": ".py",
591
+ "mimetype": "text/x-python",
592
+ "name": "python",
593
+ "nbconvert_exporter": "python",
594
+ "pygments_lexer": "ipython3",
595
+ "version": "3.10.13"
596
+ },
597
+ "widgets": {
598
+ "application/vnd.jupyter.widget-state+json": {
599
+ "state": {
600
+ "026c1486481a4a5cb64c3e3e4dfea542": {
601
+ "model_module": "@jupyter-widgets/controls",
602
+ "model_module_version": "2.0.0",
603
+ "model_name": "HTMLStyleModel",
604
+ "state": {
605
+ "_model_module": "@jupyter-widgets/controls",
606
+ "_model_module_version": "2.0.0",
607
+ "_model_name": "HTMLStyleModel",
608
+ "_view_count": null,
609
+ "_view_module": "@jupyter-widgets/base",
610
+ "_view_module_version": "2.0.0",
611
+ "_view_name": "StyleView",
612
+ "background": null,
613
+ "description_width": "",
614
+ "font_size": null,
615
+ "text_color": null
616
+ }
617
+ },
618
+ "0cecc462036e46ceabe7e0bbf41f086c": {
619
+ "model_module": "@jupyter-widgets/base",
620
+ "model_module_version": "2.0.0",
621
+ "model_name": "LayoutModel",
622
+ "state": {
623
+ "_model_module": "@jupyter-widgets/base",
624
+ "_model_module_version": "2.0.0",
625
+ "_model_name": "LayoutModel",
626
+ "_view_count": null,
627
+ "_view_module": "@jupyter-widgets/base",
628
+ "_view_module_version": "2.0.0",
629
+ "_view_name": "LayoutView",
630
+ "align_content": null,
631
+ "align_items": null,
632
+ "align_self": null,
633
+ "border_bottom": null,
634
+ "border_left": null,
635
+ "border_right": null,
636
+ "border_top": null,
637
+ "bottom": null,
638
+ "display": null,
639
+ "flex": null,
640
+ "flex_flow": null,
641
+ "grid_area": null,
642
+ "grid_auto_columns": null,
643
+ "grid_auto_flow": null,
644
+ "grid_auto_rows": null,
645
+ "grid_column": null,
646
+ "grid_gap": null,
647
+ "grid_row": null,
648
+ "grid_template_areas": null,
649
+ "grid_template_columns": null,
650
+ "grid_template_rows": null,
651
+ "height": null,
652
+ "justify_content": null,
653
+ "justify_items": null,
654
+ "left": null,
655
+ "margin": null,
656
+ "max_height": null,
657
+ "max_width": null,
658
+ "min_height": null,
659
+ "min_width": null,
660
+ "object_fit": null,
661
+ "object_position": null,
662
+ "order": null,
663
+ "overflow": null,
664
+ "padding": null,
665
+ "right": null,
666
+ "top": null,
667
+ "visibility": null,
668
+ "width": null
669
+ }
670
+ },
671
+ "1c916b704c77462ebf088c5b45543e05": {
672
+ "model_module": "@jupyter-widgets/controls",
673
+ "model_module_version": "2.0.0",
674
+ "model_name": "HTMLModel",
675
+ "state": {
676
+ "_dom_classes": [],
677
+ "_model_module": "@jupyter-widgets/controls",
678
+ "_model_module_version": "2.0.0",
679
+ "_model_name": "HTMLModel",
680
+ "_view_count": null,
681
+ "_view_module": "@jupyter-widgets/controls",
682
+ "_view_module_version": "2.0.0",
683
+ "_view_name": "HTMLView",
684
+ "description": "",
685
+ "description_allow_html": false,
686
+ "layout": "IPY_MODEL_bbd4b71814b24819bc02e8a3ae7cf936",
687
+ "placeholder": "​",
688
+ "style": "IPY_MODEL_026c1486481a4a5cb64c3e3e4dfea542",
689
+ "tabbable": null,
690
+ "tooltip": null,
691
+ "value": "Batches: 100%"
692
+ }
693
+ },
694
+ "23da5426a33a4f2e8eba26ae5371beb0": {
695
+ "model_module": "@jupyter-widgets/controls",
696
+ "model_module_version": "2.0.0",
697
+ "model_name": "FloatProgressModel",
698
+ "state": {
699
+ "_dom_classes": [],
700
+ "_model_module": "@jupyter-widgets/controls",
701
+ "_model_module_version": "2.0.0",
702
+ "_model_name": "FloatProgressModel",
703
+ "_view_count": null,
704
+ "_view_module": "@jupyter-widgets/controls",
705
+ "_view_module_version": "2.0.0",
706
+ "_view_name": "ProgressView",
707
+ "bar_style": "success",
708
+ "description": "",
709
+ "description_allow_html": false,
710
+ "layout": "IPY_MODEL_b9c7a538fab94ea08463c82c24344a64",
711
+ "max": 1.0,
712
+ "min": 0.0,
713
+ "orientation": "horizontal",
714
+ "style": "IPY_MODEL_f58162f746b140a8bc38b978a971b841",
715
+ "tabbable": null,
716
+ "tooltip": null,
717
+ "value": 1.0
718
+ }
719
+ },
720
+ "327031d1fca04e19ba9fb925ea1f07af": {
721
+ "model_module": "@jupyter-widgets/controls",
722
+ "model_module_version": "2.0.0",
723
+ "model_name": "HTMLModel",
724
+ "state": {
725
+ "_dom_classes": [],
726
+ "_model_module": "@jupyter-widgets/controls",
727
+ "_model_module_version": "2.0.0",
728
+ "_model_name": "HTMLModel",
729
+ "_view_count": null,
730
+ "_view_module": "@jupyter-widgets/controls",
731
+ "_view_module_version": "2.0.0",
732
+ "_view_name": "HTMLView",
733
+ "description": "",
734
+ "description_allow_html": false,
735
+ "layout": "IPY_MODEL_dca88061cb6f43748dcdc1132ecccc90",
736
+ "placeholder": "​",
737
+ "style": "IPY_MODEL_675d17109fee467495b1e03599ac9261",
738
+ "tabbable": null,
739
+ "tooltip": null,
740
+ "value": " 1/1 [00:00&lt;00:00,  7.20it/s]"
741
+ }
742
+ },
743
+ "3ccf196c0d0d4dd089991bd63d435c4d": {
744
+ "model_module": "@jupyter-widgets/base",
745
+ "model_module_version": "2.0.0",
746
+ "model_name": "LayoutModel",
747
+ "state": {
748
+ "_model_module": "@jupyter-widgets/base",
749
+ "_model_module_version": "2.0.0",
750
+ "_model_name": "LayoutModel",
751
+ "_view_count": null,
752
+ "_view_module": "@jupyter-widgets/base",
753
+ "_view_module_version": "2.0.0",
754
+ "_view_name": "LayoutView",
755
+ "align_content": null,
756
+ "align_items": null,
757
+ "align_self": null,
758
+ "border_bottom": null,
759
+ "border_left": null,
760
+ "border_right": null,
761
+ "border_top": null,
762
+ "bottom": null,
763
+ "display": null,
764
+ "flex": null,
765
+ "flex_flow": null,
766
+ "grid_area": null,
767
+ "grid_auto_columns": null,
768
+ "grid_auto_flow": null,
769
+ "grid_auto_rows": null,
770
+ "grid_column": null,
771
+ "grid_gap": null,
772
+ "grid_row": null,
773
+ "grid_template_areas": null,
774
+ "grid_template_columns": null,
775
+ "grid_template_rows": null,
776
+ "height": null,
777
+ "justify_content": null,
778
+ "justify_items": null,
779
+ "left": null,
780
+ "margin": null,
781
+ "max_height": null,
782
+ "max_width": null,
783
+ "min_height": null,
784
+ "min_width": null,
785
+ "object_fit": null,
786
+ "object_position": null,
787
+ "order": null,
788
+ "overflow": null,
789
+ "padding": null,
790
+ "right": null,
791
+ "top": null,
792
+ "visibility": null,
793
+ "width": null
794
+ }
795
+ },
796
+ "4fe0753b29774d8daf7ecb3f89719349": {
797
+ "model_module": "@jupyter-widgets/controls",
798
+ "model_module_version": "2.0.0",
799
+ "model_name": "HBoxModel",
800
+ "state": {
801
+ "_dom_classes": [],
802
+ "_model_module": "@jupyter-widgets/controls",
803
+ "_model_module_version": "2.0.0",
804
+ "_model_name": "HBoxModel",
805
+ "_view_count": null,
806
+ "_view_module": "@jupyter-widgets/controls",
807
+ "_view_module_version": "2.0.0",
808
+ "_view_name": "HBoxView",
809
+ "box_style": "",
810
+ "children": [
811
+ "IPY_MODEL_8aae53bc7d2d495ca6eb05f98d6fc8da",
812
+ "IPY_MODEL_23da5426a33a4f2e8eba26ae5371beb0",
813
+ "IPY_MODEL_327031d1fca04e19ba9fb925ea1f07af"
814
+ ],
815
+ "layout": "IPY_MODEL_f991f9535ca04bc2bee7f1964f1132db",
816
+ "tabbable": null,
817
+ "tooltip": null
818
+ }
819
+ },
820
+ "548fdb05df104726991dc64580769247": {
821
+ "model_module": "@jupyter-widgets/controls",
822
+ "model_module_version": "2.0.0",
823
+ "model_name": "HTMLStyleModel",
824
+ "state": {
825
+ "_model_module": "@jupyter-widgets/controls",
826
+ "_model_module_version": "2.0.0",
827
+ "_model_name": "HTMLStyleModel",
828
+ "_view_count": null,
829
+ "_view_module": "@jupyter-widgets/base",
830
+ "_view_module_version": "2.0.0",
831
+ "_view_name": "StyleView",
832
+ "background": null,
833
+ "description_width": "",
834
+ "font_size": null,
835
+ "text_color": null
836
+ }
837
+ },
838
+ "675d17109fee467495b1e03599ac9261": {
839
+ "model_module": "@jupyter-widgets/controls",
840
+ "model_module_version": "2.0.0",
841
+ "model_name": "HTMLStyleModel",
842
+ "state": {
843
+ "_model_module": "@jupyter-widgets/controls",
844
+ "_model_module_version": "2.0.0",
845
+ "_model_name": "HTMLStyleModel",
846
+ "_view_count": null,
847
+ "_view_module": "@jupyter-widgets/base",
848
+ "_view_module_version": "2.0.0",
849
+ "_view_name": "StyleView",
850
+ "background": null,
851
+ "description_width": "",
852
+ "font_size": null,
853
+ "text_color": null
854
+ }
855
+ },
856
+ "7ce5089ebf82424abba28acc0ce522da": {
857
+ "model_module": "@jupyter-widgets/controls",
858
+ "model_module_version": "2.0.0",
859
+ "model_name": "ProgressStyleModel",
860
+ "state": {
861
+ "_model_module": "@jupyter-widgets/controls",
862
+ "_model_module_version": "2.0.0",
863
+ "_model_name": "ProgressStyleModel",
864
+ "_view_count": null,
865
+ "_view_module": "@jupyter-widgets/base",
866
+ "_view_module_version": "2.0.0",
867
+ "_view_name": "StyleView",
868
+ "bar_color": null,
869
+ "description_width": ""
870
+ }
871
+ },
872
+ "8328a5952f5645f3b31ab5351fb16a9a": {
873
+ "model_module": "@jupyter-widgets/controls",
874
+ "model_module_version": "2.0.0",
875
+ "model_name": "FloatProgressModel",
876
+ "state": {
877
+ "_dom_classes": [],
878
+ "_model_module": "@jupyter-widgets/controls",
879
+ "_model_module_version": "2.0.0",
880
+ "_model_name": "FloatProgressModel",
881
+ "_view_count": null,
882
+ "_view_module": "@jupyter-widgets/controls",
883
+ "_view_module_version": "2.0.0",
884
+ "_view_name": "ProgressView",
885
+ "bar_style": "success",
886
+ "description": "",
887
+ "description_allow_html": false,
888
+ "layout": "IPY_MODEL_96691282e844404ca0686f8777371ba9",
889
+ "max": 1.0,
890
+ "min": 0.0,
891
+ "orientation": "horizontal",
892
+ "style": "IPY_MODEL_7ce5089ebf82424abba28acc0ce522da",
893
+ "tabbable": null,
894
+ "tooltip": null,
895
+ "value": 1.0
896
+ }
897
+ },
898
+ "8aae53bc7d2d495ca6eb05f98d6fc8da": {
899
+ "model_module": "@jupyter-widgets/controls",
900
+ "model_module_version": "2.0.0",
901
+ "model_name": "HTMLModel",
902
+ "state": {
903
+ "_dom_classes": [],
904
+ "_model_module": "@jupyter-widgets/controls",
905
+ "_model_module_version": "2.0.0",
906
+ "_model_name": "HTMLModel",
907
+ "_view_count": null,
908
+ "_view_module": "@jupyter-widgets/controls",
909
+ "_view_module_version": "2.0.0",
910
+ "_view_name": "HTMLView",
911
+ "description": "",
912
+ "description_allow_html": false,
913
+ "layout": "IPY_MODEL_8e70848dd5404708b60063a404310668",
914
+ "placeholder": "​",
915
+ "style": "IPY_MODEL_b9f037775be645de95f0c49ce550385a",
916
+ "tabbable": null,
917
+ "tooltip": null,
918
+ "value": "Batches: 100%"
919
+ }
920
+ },
921
+ "8e70848dd5404708b60063a404310668": {
922
+ "model_module": "@jupyter-widgets/base",
923
+ "model_module_version": "2.0.0",
924
+ "model_name": "LayoutModel",
925
+ "state": {
926
+ "_model_module": "@jupyter-widgets/base",
927
+ "_model_module_version": "2.0.0",
928
+ "_model_name": "LayoutModel",
929
+ "_view_count": null,
930
+ "_view_module": "@jupyter-widgets/base",
931
+ "_view_module_version": "2.0.0",
932
+ "_view_name": "LayoutView",
933
+ "align_content": null,
934
+ "align_items": null,
935
+ "align_self": null,
936
+ "border_bottom": null,
937
+ "border_left": null,
938
+ "border_right": null,
939
+ "border_top": null,
940
+ "bottom": null,
941
+ "display": null,
942
+ "flex": null,
943
+ "flex_flow": null,
944
+ "grid_area": null,
945
+ "grid_auto_columns": null,
946
+ "grid_auto_flow": null,
947
+ "grid_auto_rows": null,
948
+ "grid_column": null,
949
+ "grid_gap": null,
950
+ "grid_row": null,
951
+ "grid_template_areas": null,
952
+ "grid_template_columns": null,
953
+ "grid_template_rows": null,
954
+ "height": null,
955
+ "justify_content": null,
956
+ "justify_items": null,
957
+ "left": null,
958
+ "margin": null,
959
+ "max_height": null,
960
+ "max_width": null,
961
+ "min_height": null,
962
+ "min_width": null,
963
+ "object_fit": null,
964
+ "object_position": null,
965
+ "order": null,
966
+ "overflow": null,
967
+ "padding": null,
968
+ "right": null,
969
+ "top": null,
970
+ "visibility": null,
971
+ "width": null
972
+ }
973
+ },
974
+ "96691282e844404ca0686f8777371ba9": {
975
+ "model_module": "@jupyter-widgets/base",
976
+ "model_module_version": "2.0.0",
977
+ "model_name": "LayoutModel",
978
+ "state": {
979
+ "_model_module": "@jupyter-widgets/base",
980
+ "_model_module_version": "2.0.0",
981
+ "_model_name": "LayoutModel",
982
+ "_view_count": null,
983
+ "_view_module": "@jupyter-widgets/base",
984
+ "_view_module_version": "2.0.0",
985
+ "_view_name": "LayoutView",
986
+ "align_content": null,
987
+ "align_items": null,
988
+ "align_self": null,
989
+ "border_bottom": null,
990
+ "border_left": null,
991
+ "border_right": null,
992
+ "border_top": null,
993
+ "bottom": null,
994
+ "display": null,
995
+ "flex": null,
996
+ "flex_flow": null,
997
+ "grid_area": null,
998
+ "grid_auto_columns": null,
999
+ "grid_auto_flow": null,
1000
+ "grid_auto_rows": null,
1001
+ "grid_column": null,
1002
+ "grid_gap": null,
1003
+ "grid_row": null,
1004
+ "grid_template_areas": null,
1005
+ "grid_template_columns": null,
1006
+ "grid_template_rows": null,
1007
+ "height": null,
1008
+ "justify_content": null,
1009
+ "justify_items": null,
1010
+ "left": null,
1011
+ "margin": null,
1012
+ "max_height": null,
1013
+ "max_width": null,
1014
+ "min_height": null,
1015
+ "min_width": null,
1016
+ "object_fit": null,
1017
+ "object_position": null,
1018
+ "order": null,
1019
+ "overflow": null,
1020
+ "padding": null,
1021
+ "right": null,
1022
+ "top": null,
1023
+ "visibility": null,
1024
+ "width": null
1025
+ }
1026
+ },
1027
+ "b9c7a538fab94ea08463c82c24344a64": {
1028
+ "model_module": "@jupyter-widgets/base",
1029
+ "model_module_version": "2.0.0",
1030
+ "model_name": "LayoutModel",
1031
+ "state": {
1032
+ "_model_module": "@jupyter-widgets/base",
1033
+ "_model_module_version": "2.0.0",
1034
+ "_model_name": "LayoutModel",
1035
+ "_view_count": null,
1036
+ "_view_module": "@jupyter-widgets/base",
1037
+ "_view_module_version": "2.0.0",
1038
+ "_view_name": "LayoutView",
1039
+ "align_content": null,
1040
+ "align_items": null,
1041
+ "align_self": null,
1042
+ "border_bottom": null,
1043
+ "border_left": null,
1044
+ "border_right": null,
1045
+ "border_top": null,
1046
+ "bottom": null,
1047
+ "display": null,
1048
+ "flex": null,
1049
+ "flex_flow": null,
1050
+ "grid_area": null,
1051
+ "grid_auto_columns": null,
1052
+ "grid_auto_flow": null,
1053
+ "grid_auto_rows": null,
1054
+ "grid_column": null,
1055
+ "grid_gap": null,
1056
+ "grid_row": null,
1057
+ "grid_template_areas": null,
1058
+ "grid_template_columns": null,
1059
+ "grid_template_rows": null,
1060
+ "height": null,
1061
+ "justify_content": null,
1062
+ "justify_items": null,
1063
+ "left": null,
1064
+ "margin": null,
1065
+ "max_height": null,
1066
+ "max_width": null,
1067
+ "min_height": null,
1068
+ "min_width": null,
1069
+ "object_fit": null,
1070
+ "object_position": null,
1071
+ "order": null,
1072
+ "overflow": null,
1073
+ "padding": null,
1074
+ "right": null,
1075
+ "top": null,
1076
+ "visibility": null,
1077
+ "width": null
1078
+ }
1079
+ },
1080
+ "b9f037775be645de95f0c49ce550385a": {
1081
+ "model_module": "@jupyter-widgets/controls",
1082
+ "model_module_version": "2.0.0",
1083
+ "model_name": "HTMLStyleModel",
1084
+ "state": {
1085
+ "_model_module": "@jupyter-widgets/controls",
1086
+ "_model_module_version": "2.0.0",
1087
+ "_model_name": "HTMLStyleModel",
1088
+ "_view_count": null,
1089
+ "_view_module": "@jupyter-widgets/base",
1090
+ "_view_module_version": "2.0.0",
1091
+ "_view_name": "StyleView",
1092
+ "background": null,
1093
+ "description_width": "",
1094
+ "font_size": null,
1095
+ "text_color": null
1096
+ }
1097
+ },
1098
+ "bbd4b71814b24819bc02e8a3ae7cf936": {
1099
+ "model_module": "@jupyter-widgets/base",
1100
+ "model_module_version": "2.0.0",
1101
+ "model_name": "LayoutModel",
1102
+ "state": {
1103
+ "_model_module": "@jupyter-widgets/base",
1104
+ "_model_module_version": "2.0.0",
1105
+ "_model_name": "LayoutModel",
1106
+ "_view_count": null,
1107
+ "_view_module": "@jupyter-widgets/base",
1108
+ "_view_module_version": "2.0.0",
1109
+ "_view_name": "LayoutView",
1110
+ "align_content": null,
1111
+ "align_items": null,
1112
+ "align_self": null,
1113
+ "border_bottom": null,
1114
+ "border_left": null,
1115
+ "border_right": null,
1116
+ "border_top": null,
1117
+ "bottom": null,
1118
+ "display": null,
1119
+ "flex": null,
1120
+ "flex_flow": null,
1121
+ "grid_area": null,
1122
+ "grid_auto_columns": null,
1123
+ "grid_auto_flow": null,
1124
+ "grid_auto_rows": null,
1125
+ "grid_column": null,
1126
+ "grid_gap": null,
1127
+ "grid_row": null,
1128
+ "grid_template_areas": null,
1129
+ "grid_template_columns": null,
1130
+ "grid_template_rows": null,
1131
+ "height": null,
1132
+ "justify_content": null,
1133
+ "justify_items": null,
1134
+ "left": null,
1135
+ "margin": null,
1136
+ "max_height": null,
1137
+ "max_width": null,
1138
+ "min_height": null,
1139
+ "min_width": null,
1140
+ "object_fit": null,
1141
+ "object_position": null,
1142
+ "order": null,
1143
+ "overflow": null,
1144
+ "padding": null,
1145
+ "right": null,
1146
+ "top": null,
1147
+ "visibility": null,
1148
+ "width": null
1149
+ }
1150
+ },
1151
+ "cf2741c35c4e430a80c721891fe023d9": {
1152
+ "model_module": "@jupyter-widgets/controls",
1153
+ "model_module_version": "2.0.0",
1154
+ "model_name": "HBoxModel",
1155
+ "state": {
1156
+ "_dom_classes": [],
1157
+ "_model_module": "@jupyter-widgets/controls",
1158
+ "_model_module_version": "2.0.0",
1159
+ "_model_name": "HBoxModel",
1160
+ "_view_count": null,
1161
+ "_view_module": "@jupyter-widgets/controls",
1162
+ "_view_module_version": "2.0.0",
1163
+ "_view_name": "HBoxView",
1164
+ "box_style": "",
1165
+ "children": [
1166
+ "IPY_MODEL_1c916b704c77462ebf088c5b45543e05",
1167
+ "IPY_MODEL_8328a5952f5645f3b31ab5351fb16a9a",
1168
+ "IPY_MODEL_e567efbe84ed461d85b57fc9d69d9d9c"
1169
+ ],
1170
+ "layout": "IPY_MODEL_0cecc462036e46ceabe7e0bbf41f086c",
1171
+ "tabbable": null,
1172
+ "tooltip": null
1173
+ }
1174
+ },
1175
+ "dca88061cb6f43748dcdc1132ecccc90": {
1176
+ "model_module": "@jupyter-widgets/base",
1177
+ "model_module_version": "2.0.0",
1178
+ "model_name": "LayoutModel",
1179
+ "state": {
1180
+ "_model_module": "@jupyter-widgets/base",
1181
+ "_model_module_version": "2.0.0",
1182
+ "_model_name": "LayoutModel",
1183
+ "_view_count": null,
1184
+ "_view_module": "@jupyter-widgets/base",
1185
+ "_view_module_version": "2.0.0",
1186
+ "_view_name": "LayoutView",
1187
+ "align_content": null,
1188
+ "align_items": null,
1189
+ "align_self": null,
1190
+ "border_bottom": null,
1191
+ "border_left": null,
1192
+ "border_right": null,
1193
+ "border_top": null,
1194
+ "bottom": null,
1195
+ "display": null,
1196
+ "flex": null,
1197
+ "flex_flow": null,
1198
+ "grid_area": null,
1199
+ "grid_auto_columns": null,
1200
+ "grid_auto_flow": null,
1201
+ "grid_auto_rows": null,
1202
+ "grid_column": null,
1203
+ "grid_gap": null,
1204
+ "grid_row": null,
1205
+ "grid_template_areas": null,
1206
+ "grid_template_columns": null,
1207
+ "grid_template_rows": null,
1208
+ "height": null,
1209
+ "justify_content": null,
1210
+ "justify_items": null,
1211
+ "left": null,
1212
+ "margin": null,
1213
+ "max_height": null,
1214
+ "max_width": null,
1215
+ "min_height": null,
1216
+ "min_width": null,
1217
+ "object_fit": null,
1218
+ "object_position": null,
1219
+ "order": null,
1220
+ "overflow": null,
1221
+ "padding": null,
1222
+ "right": null,
1223
+ "top": null,
1224
+ "visibility": null,
1225
+ "width": null
1226
+ }
1227
+ },
1228
+ "e567efbe84ed461d85b57fc9d69d9d9c": {
1229
+ "model_module": "@jupyter-widgets/controls",
1230
+ "model_module_version": "2.0.0",
1231
+ "model_name": "HTMLModel",
1232
+ "state": {
1233
+ "_dom_classes": [],
1234
+ "_model_module": "@jupyter-widgets/controls",
1235
+ "_model_module_version": "2.0.0",
1236
+ "_model_name": "HTMLModel",
1237
+ "_view_count": null,
1238
+ "_view_module": "@jupyter-widgets/controls",
1239
+ "_view_module_version": "2.0.0",
1240
+ "_view_name": "HTMLView",
1241
+ "description": "",
1242
+ "description_allow_html": false,
1243
+ "layout": "IPY_MODEL_3ccf196c0d0d4dd089991bd63d435c4d",
1244
+ "placeholder": "​",
1245
+ "style": "IPY_MODEL_548fdb05df104726991dc64580769247",
1246
+ "tabbable": null,
1247
+ "tooltip": null,
1248
+ "value": " 1/1 [00:00&lt;00:00,  4.09it/s]"
1249
+ }
1250
+ },
1251
+ "f58162f746b140a8bc38b978a971b841": {
1252
+ "model_module": "@jupyter-widgets/controls",
1253
+ "model_module_version": "2.0.0",
1254
+ "model_name": "ProgressStyleModel",
1255
+ "state": {
1256
+ "_model_module": "@jupyter-widgets/controls",
1257
+ "_model_module_version": "2.0.0",
1258
+ "_model_name": "ProgressStyleModel",
1259
+ "_view_count": null,
1260
+ "_view_module": "@jupyter-widgets/base",
1261
+ "_view_module_version": "2.0.0",
1262
+ "_view_name": "StyleView",
1263
+ "bar_color": null,
1264
+ "description_width": ""
1265
+ }
1266
+ },
1267
+ "f991f9535ca04bc2bee7f1964f1132db": {
1268
+ "model_module": "@jupyter-widgets/base",
1269
+ "model_module_version": "2.0.0",
1270
+ "model_name": "LayoutModel",
1271
+ "state": {
1272
+ "_model_module": "@jupyter-widgets/base",
1273
+ "_model_module_version": "2.0.0",
1274
+ "_model_name": "LayoutModel",
1275
+ "_view_count": null,
1276
+ "_view_module": "@jupyter-widgets/base",
1277
+ "_view_module_version": "2.0.0",
1278
+ "_view_name": "LayoutView",
1279
+ "align_content": null,
1280
+ "align_items": null,
1281
+ "align_self": null,
1282
+ "border_bottom": null,
1283
+ "border_left": null,
1284
+ "border_right": null,
1285
+ "border_top": null,
1286
+ "bottom": null,
1287
+ "display": null,
1288
+ "flex": null,
1289
+ "flex_flow": null,
1290
+ "grid_area": null,
1291
+ "grid_auto_columns": null,
1292
+ "grid_auto_flow": null,
1293
+ "grid_auto_rows": null,
1294
+ "grid_column": null,
1295
+ "grid_gap": null,
1296
+ "grid_row": null,
1297
+ "grid_template_areas": null,
1298
+ "grid_template_columns": null,
1299
+ "grid_template_rows": null,
1300
+ "height": null,
1301
+ "justify_content": null,
1302
+ "justify_items": null,
1303
+ "left": null,
1304
+ "margin": null,
1305
+ "max_height": null,
1306
+ "max_width": null,
1307
+ "min_height": null,
1308
+ "min_width": null,
1309
+ "object_fit": null,
1310
+ "object_position": null,
1311
+ "order": null,
1312
+ "overflow": null,
1313
+ "padding": null,
1314
+ "right": null,
1315
+ "top": null,
1316
+ "visibility": null,
1317
+ "width": null
1318
+ }
1319
+ }
1320
+ },
1321
+ "version_major": 2,
1322
+ "version_minor": 0
1323
+ }
1324
+ }
1325
+ },
1326
+ "nbformat": 4,
1327
+ "nbformat_minor": 5
1328
+ }