natural-pdf 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. natural_pdf/__init__.py +3 -0
  2. natural_pdf/analyzers/layout/base.py +1 -5
  3. natural_pdf/analyzers/layout/gemini.py +61 -51
  4. natural_pdf/analyzers/layout/layout_analyzer.py +40 -11
  5. natural_pdf/analyzers/layout/layout_manager.py +26 -84
  6. natural_pdf/analyzers/layout/layout_options.py +7 -0
  7. natural_pdf/analyzers/layout/pdfplumber_table_finder.py +142 -0
  8. natural_pdf/analyzers/layout/surya.py +46 -123
  9. natural_pdf/analyzers/layout/tatr.py +51 -4
  10. natural_pdf/analyzers/text_structure.py +3 -5
  11. natural_pdf/analyzers/utils.py +3 -3
  12. natural_pdf/classification/manager.py +422 -0
  13. natural_pdf/classification/mixin.py +163 -0
  14. natural_pdf/classification/results.py +80 -0
  15. natural_pdf/collections/mixins.py +111 -0
  16. natural_pdf/collections/pdf_collection.py +434 -15
  17. natural_pdf/core/element_manager.py +83 -0
  18. natural_pdf/core/highlighting_service.py +13 -22
  19. natural_pdf/core/page.py +578 -93
  20. natural_pdf/core/pdf.py +912 -460
  21. natural_pdf/elements/base.py +134 -40
  22. natural_pdf/elements/collections.py +712 -109
  23. natural_pdf/elements/region.py +722 -69
  24. natural_pdf/elements/text.py +4 -1
  25. natural_pdf/export/mixin.py +137 -0
  26. natural_pdf/exporters/base.py +3 -3
  27. natural_pdf/exporters/paddleocr.py +5 -4
  28. natural_pdf/extraction/manager.py +135 -0
  29. natural_pdf/extraction/mixin.py +279 -0
  30. natural_pdf/extraction/result.py +23 -0
  31. natural_pdf/ocr/__init__.py +5 -5
  32. natural_pdf/ocr/engine_doctr.py +346 -0
  33. natural_pdf/ocr/engine_easyocr.py +6 -3
  34. natural_pdf/ocr/ocr_factory.py +24 -4
  35. natural_pdf/ocr/ocr_manager.py +122 -26
  36. natural_pdf/ocr/ocr_options.py +94 -11
  37. natural_pdf/ocr/utils.py +19 -6
  38. natural_pdf/qa/document_qa.py +0 -4
  39. natural_pdf/search/__init__.py +20 -34
  40. natural_pdf/search/haystack_search_service.py +309 -265
  41. natural_pdf/search/haystack_utils.py +99 -75
  42. natural_pdf/search/search_service_protocol.py +11 -12
  43. natural_pdf/selectors/parser.py +431 -230
  44. natural_pdf/utils/debug.py +3 -3
  45. natural_pdf/utils/identifiers.py +1 -1
  46. natural_pdf/utils/locks.py +8 -0
  47. natural_pdf/utils/packaging.py +8 -6
  48. natural_pdf/utils/text_extraction.py +60 -1
  49. natural_pdf/utils/tqdm_utils.py +51 -0
  50. natural_pdf/utils/visualization.py +18 -0
  51. natural_pdf/widgets/viewer.py +4 -25
  52. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.9.dist-info}/METADATA +17 -3
  53. natural_pdf-0.1.9.dist-info/RECORD +80 -0
  54. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.9.dist-info}/WHEEL +1 -1
  55. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.9.dist-info}/top_level.txt +0 -2
  56. docs/api/index.md +0 -386
  57. docs/assets/favicon.png +0 -3
  58. docs/assets/favicon.svg +0 -3
  59. docs/assets/javascripts/custom.js +0 -17
  60. docs/assets/logo.svg +0 -3
  61. docs/assets/sample-screen.png +0 -0
  62. docs/assets/social-preview.png +0 -17
  63. docs/assets/social-preview.svg +0 -17
  64. docs/assets/stylesheets/custom.css +0 -65
  65. docs/document-qa/index.ipynb +0 -435
  66. docs/document-qa/index.md +0 -79
  67. docs/element-selection/index.ipynb +0 -915
  68. docs/element-selection/index.md +0 -229
  69. docs/finetuning/index.md +0 -176
  70. docs/index.md +0 -170
  71. docs/installation/index.md +0 -69
  72. docs/interactive-widget/index.ipynb +0 -962
  73. docs/interactive-widget/index.md +0 -12
  74. docs/layout-analysis/index.ipynb +0 -818
  75. docs/layout-analysis/index.md +0 -185
  76. docs/ocr/index.md +0 -209
  77. docs/pdf-navigation/index.ipynb +0 -314
  78. docs/pdf-navigation/index.md +0 -97
  79. docs/regions/index.ipynb +0 -816
  80. docs/regions/index.md +0 -294
  81. docs/tables/index.ipynb +0 -658
  82. docs/tables/index.md +0 -144
  83. docs/text-analysis/index.ipynb +0 -370
  84. docs/text-analysis/index.md +0 -105
  85. docs/text-extraction/index.ipynb +0 -1478
  86. docs/text-extraction/index.md +0 -292
  87. docs/tutorials/01-loading-and-extraction.ipynb +0 -194
  88. docs/tutorials/01-loading-and-extraction.md +0 -95
  89. docs/tutorials/02-finding-elements.ipynb +0 -340
  90. docs/tutorials/02-finding-elements.md +0 -149
  91. docs/tutorials/03-extracting-blocks.ipynb +0 -147
  92. docs/tutorials/03-extracting-blocks.md +0 -48
  93. docs/tutorials/04-table-extraction.ipynb +0 -114
  94. docs/tutorials/04-table-extraction.md +0 -50
  95. docs/tutorials/05-excluding-content.ipynb +0 -270
  96. docs/tutorials/05-excluding-content.md +0 -109
  97. docs/tutorials/06-document-qa.ipynb +0 -332
  98. docs/tutorials/06-document-qa.md +0 -91
  99. docs/tutorials/07-layout-analysis.ipynb +0 -288
  100. docs/tutorials/07-layout-analysis.md +0 -66
  101. docs/tutorials/07-working-with-regions.ipynb +0 -413
  102. docs/tutorials/07-working-with-regions.md +0 -151
  103. docs/tutorials/08-spatial-navigation.ipynb +0 -508
  104. docs/tutorials/08-spatial-navigation.md +0 -190
  105. docs/tutorials/09-section-extraction.ipynb +0 -2434
  106. docs/tutorials/09-section-extraction.md +0 -256
  107. docs/tutorials/10-form-field-extraction.ipynb +0 -512
  108. docs/tutorials/10-form-field-extraction.md +0 -201
  109. docs/tutorials/11-enhanced-table-processing.ipynb +0 -54
  110. docs/tutorials/11-enhanced-table-processing.md +0 -9
  111. docs/tutorials/12-ocr-integration.ipynb +0 -604
  112. docs/tutorials/12-ocr-integration.md +0 -175
  113. docs/tutorials/13-semantic-search.ipynb +0 -1328
  114. docs/tutorials/13-semantic-search.md +0 -77
  115. docs/visual-debugging/index.ipynb +0 -2970
  116. docs/visual-debugging/index.md +0 -157
  117. docs/visual-debugging/region.png +0 -0
  118. natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -415
  119. natural_pdf/templates/spa/css/style.css +0 -334
  120. natural_pdf/templates/spa/index.html +0 -31
  121. natural_pdf/templates/spa/js/app.js +0 -472
  122. natural_pdf/templates/spa/words.txt +0 -235976
  123. natural_pdf/widgets/frontend/viewer.js +0 -88
  124. natural_pdf-0.1.7.dist-info/RECORD +0 -145
  125. notebooks/Examples.ipynb +0 -1293
  126. pdfs/.gitkeep +0 -0
  127. pdfs/01-practice.pdf +0 -543
  128. pdfs/0500000US42001.pdf +0 -0
  129. pdfs/0500000US42007.pdf +0 -0
  130. pdfs/2014 Statistics.pdf +0 -0
  131. pdfs/2019 Statistics.pdf +0 -0
  132. pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  133. pdfs/needs-ocr.pdf +0 -0
  134. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.9.dist-info}/licenses/LICENSE +0 -0
@@ -1,1328 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "6ba4c324",
6
- "metadata": {},
7
- "source": [
8
- "# Semantic Search Across Multiple Documents\n",
9
- "\n",
10
- "When working with a collection of PDFs, you might need to find information relevant to a specific query across all documents, not just within a single one. This tutorial demonstrates how to perform semantic search over a `PDFCollection`."
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "execution_count": 1,
16
- "id": "573f47ae",
17
- "metadata": {
18
- "execution": {
19
- "iopub.execute_input": "2025-04-21T21:26:26.425833Z",
20
- "iopub.status.busy": "2025-04-21T21:26:26.425672Z",
21
- "iopub.status.idle": "2025-04-21T21:26:26.430590Z",
22
- "shell.execute_reply": "2025-04-21T21:26:26.430219Z"
23
- }
24
- },
25
- "outputs": [],
26
- "source": [
27
- "#%pip install \"natural-pdf[all]\"\n",
28
- "#%pip install \"natural-pdf[search]\" # Ensure search dependencies are installed"
29
- ]
30
- },
31
- {
32
- "cell_type": "code",
33
- "execution_count": 2,
34
- "id": "f40c7516",
35
- "metadata": {
36
- "execution": {
37
- "iopub.execute_input": "2025-04-21T21:26:26.432199Z",
38
- "iopub.status.busy": "2025-04-21T21:26:26.432080Z",
39
- "iopub.status.idle": "2025-04-21T21:26:33.413144Z",
40
- "shell.execute_reply": "2025-04-21T21:26:33.412658Z"
41
- }
42
- },
43
- "outputs": [
44
- {
45
- "name": "stderr",
46
- "output_type": "stream",
47
- "text": [
48
- "natural_pdf.collections.pdf_collection - INFO - Initializing 2 PDF objects...\n"
49
- ]
50
- },
51
- {
52
- "name": "stderr",
53
- "output_type": "stream",
54
- "text": [
55
- "\r",
56
- "Loading PDFs: 0%| | 0/2 [00:00<?, ?it/s]"
57
- ]
58
- },
59
- {
60
- "name": "stderr",
61
- "output_type": "stream",
62
- "text": [
63
- "natural_pdf.core.pdf - INFO - Downloading PDF from URL: https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/01-practice.pdf\n"
64
- ]
65
- },
66
- {
67
- "name": "stderr",
68
- "output_type": "stream",
69
- "text": [
70
- "natural_pdf.core.pdf - INFO - PDF downloaded to temporary file: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp0qxzzh1m.pdf\n"
71
- ]
72
- },
73
- {
74
- "name": "stderr",
75
- "output_type": "stream",
76
- "text": [
77
- "natural_pdf.core.pdf - INFO - Initializing PDF from /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp0qxzzh1m.pdf\n"
78
- ]
79
- },
80
- {
81
- "name": "stderr",
82
- "output_type": "stream",
83
- "text": [
84
- "natural_pdf.ocr.ocr_manager - INFO - OCRManager initialized.\n"
85
- ]
86
- },
87
- {
88
- "name": "stderr",
89
- "output_type": "stream",
90
- "text": [
91
- "natural_pdf.analyzers.layout.layout_manager - INFO - LayoutManager initialized. Available engines: ['yolo', 'tatr', 'paddle', 'surya', 'docling', 'gemini']\n"
92
- ]
93
- },
94
- {
95
- "name": "stderr",
96
- "output_type": "stream",
97
- "text": [
98
- "natural_pdf.core.highlighting_service - INFO - HighlightingService initialized with ColorManager.\n"
99
- ]
100
- },
101
- {
102
- "name": "stderr",
103
- "output_type": "stream",
104
- "text": [
105
- "natural_pdf.core.pdf - INFO - Initialized HighlightingService.\n"
106
- ]
107
- },
108
- {
109
- "name": "stderr",
110
- "output_type": "stream",
111
- "text": [
112
- "natural_pdf.core.pdf - INFO - PDF 'https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/01-practice.pdf' initialized with 1 pages.\n"
113
- ]
114
- },
115
- {
116
- "name": "stderr",
117
- "output_type": "stream",
118
- "text": [
119
- "\r",
120
- "Loading PDFs: 50%|█████████████████████████████████████████▌ | 1/2 [00:00<00:00, 6.99it/s]"
121
- ]
122
- },
123
- {
124
- "name": "stderr",
125
- "output_type": "stream",
126
- "text": [
127
- "natural_pdf.core.pdf - INFO - Downloading PDF from URL: https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/Atlanta_Public_Schools_GA_sample.pdf\n"
128
- ]
129
- },
130
- {
131
- "name": "stderr",
132
- "output_type": "stream",
133
- "text": [
134
- "natural_pdf.core.pdf - INFO - PDF downloaded to temporary file: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n"
135
- ]
136
- },
137
- {
138
- "name": "stderr",
139
- "output_type": "stream",
140
- "text": [
141
- "natural_pdf.core.pdf - INFO - Initializing PDF from /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n"
142
- ]
143
- },
144
- {
145
- "name": "stderr",
146
- "output_type": "stream",
147
- "text": [
148
- "natural_pdf.ocr.ocr_manager - INFO - OCRManager initialized.\n"
149
- ]
150
- },
151
- {
152
- "name": "stderr",
153
- "output_type": "stream",
154
- "text": [
155
- "natural_pdf.analyzers.layout.layout_manager - INFO - LayoutManager initialized. Available engines: ['yolo', 'tatr', 'paddle', 'surya', 'docling', 'gemini']\n"
156
- ]
157
- },
158
- {
159
- "name": "stderr",
160
- "output_type": "stream",
161
- "text": [
162
- "natural_pdf.core.highlighting_service - INFO - HighlightingService initialized with ColorManager.\n"
163
- ]
164
- },
165
- {
166
- "name": "stderr",
167
- "output_type": "stream",
168
- "text": [
169
- "natural_pdf.core.pdf - INFO - Initialized HighlightingService.\n"
170
- ]
171
- },
172
- {
173
- "name": "stderr",
174
- "output_type": "stream",
175
- "text": [
176
- "natural_pdf.core.pdf - INFO - PDF 'https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/Atlanta_Public_Schools_GA_sample.pdf' initialized with 5 pages.\n"
177
- ]
178
- },
179
- {
180
- "name": "stderr",
181
- "output_type": "stream",
182
- "text": [
183
- "\r",
184
- "Loading PDFs: 100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 8.19it/s]"
185
- ]
186
- },
187
- {
188
- "name": "stderr",
189
- "output_type": "stream",
190
- "text": [
191
- "\r",
192
- "Loading PDFs: 100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 7.97it/s]"
193
- ]
194
- },
195
- {
196
- "name": "stderr",
197
- "output_type": "stream",
198
- "text": [
199
- "\n",
200
- "natural_pdf.collections.pdf_collection - INFO - Successfully initialized 2 PDFs. Failed: 0\n"
201
- ]
202
- },
203
- {
204
- "name": "stdout",
205
- "output_type": "stream",
206
- "text": [
207
- "Created collection with 2 PDFs.\n"
208
- ]
209
- }
210
- ],
211
- "source": [
212
- "import logging\n",
213
- "import natural_pdf\n",
214
- "\n",
215
- "# Optional: Configure logging to see progress\n",
216
- "natural_pdf.configure_logging(level=logging.INFO)\n",
217
- "\n",
218
- "# Define the paths to your PDF files\n",
219
- "pdf_paths = [\n",
220
- " \"https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/01-practice.pdf\",\n",
221
- " \"https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/Atlanta_Public_Schools_GA_sample.pdf\"\n",
222
- " # Add more PDF paths as needed\n",
223
- "]\n",
224
- "\n",
225
- "# Create a PDFCollection\n",
226
- "collection = natural_pdf.PDFCollection(pdf_paths)\n",
227
- "print(f\"Created collection with {len(collection.pdfs)} PDFs.\")"
228
- ]
229
- },
230
- {
231
- "cell_type": "markdown",
232
- "id": "4137d1ea",
233
- "metadata": {},
234
- "source": [
235
- "## Initializing the Search Index\n",
236
- "\n",
237
- "Before performing a search, you need to initialize the search capabilities for the collection. This involves processing the documents and building an index."
238
- ]
239
- },
240
- {
241
- "cell_type": "code",
242
- "execution_count": 3,
243
- "id": "6b57f754",
244
- "metadata": {
245
- "execution": {
246
- "iopub.execute_input": "2025-04-21T21:26:33.416364Z",
247
- "iopub.status.busy": "2025-04-21T21:26:33.415349Z",
248
- "iopub.status.idle": "2025-04-21T21:26:36.015751Z",
249
- "shell.execute_reply": "2025-04-21T21:26:36.015429Z"
250
- }
251
- },
252
- "outputs": [
253
- {
254
- "name": "stderr",
255
- "output_type": "stream",
256
- "text": [
257
- "natural_pdf.search.searchable_mixin - INFO - Using default collection name 'default_collection' for in-memory service.\n"
258
- ]
259
- },
260
- {
261
- "name": "stderr",
262
- "output_type": "stream",
263
- "text": [
264
- "natural_pdf.search.searchable_mixin - INFO - Creating new SearchService: name='default_collection', persist=False, model=default\n"
265
- ]
266
- },
267
- {
268
- "name": "stderr",
269
- "output_type": "stream",
270
- "text": [
271
- "natural_pdf.search.haystack_search_service - INFO - HaystackSearchService initialized for collection='default_collection' (persist=False, model='sentence-transformers/all-MiniLM-L6-v2'). Default path: './natural_pdf_index'\n"
272
- ]
273
- },
274
- {
275
- "name": "stderr",
276
- "output_type": "stream",
277
- "text": [
278
- "natural_pdf.search - INFO - Created new HaystackSearchService instance for collection 'default_collection'.\n"
279
- ]
280
- },
281
- {
282
- "name": "stderr",
283
- "output_type": "stream",
284
- "text": [
285
- "natural_pdf.search.searchable_mixin - INFO - index=True: Proceeding to index collection immediately after search initialization.\n"
286
- ]
287
- },
288
- {
289
- "name": "stderr",
290
- "output_type": "stream",
291
- "text": [
292
- "natural_pdf.search.searchable_mixin - INFO - Starting internal indexing process into SearchService collection 'default_collection'...\n"
293
- ]
294
- },
295
- {
296
- "name": "stderr",
297
- "output_type": "stream",
298
- "text": [
299
- "natural_pdf.search.searchable_mixin - INFO - Prepared 6 indexable items for indexing.\n"
300
- ]
301
- },
302
- {
303
- "name": "stderr",
304
- "output_type": "stream",
305
- "text": [
306
- "natural_pdf.search.haystack_search_service - INFO - Index request for collection='default_collection', docs=6, model='sentence-transformers/all-MiniLM-L6-v2', force=False, persist=False\n"
307
- ]
308
- },
309
- {
310
- "name": "stderr",
311
- "output_type": "stream",
312
- "text": [
313
- "natural_pdf.search.haystack_search_service - INFO - Created SentenceTransformersDocumentEmbedder. Model: sentence-transformers/all-MiniLM-L6-v2, Device: ComponentDevice(_single_device=Device(type=<DeviceType.MPS: 'mps'>, id=None), _multiple_devices=None)\n"
314
- ]
315
- },
316
- {
317
- "name": "stderr",
318
- "output_type": "stream",
319
- "text": [
320
- "natural_pdf.search.haystack_search_service - INFO - Preparing Haystack Documents from 6 indexable items...\n"
321
- ]
322
- },
323
- {
324
- "name": "stderr",
325
- "output_type": "stream",
326
- "text": [
327
- "natural_pdf.search.haystack_search_service - INFO - Embedding 6 documents using 'sentence-transformers/all-MiniLM-L6-v2'...\n"
328
- ]
329
- },
330
- {
331
- "data": {
332
- "application/vnd.jupyter.widget-view+json": {
333
- "model_id": "cf2741c35c4e430a80c721891fe023d9",
334
- "version_major": 2,
335
- "version_minor": 0
336
- },
337
- "text/plain": [
338
- "Batches: 0%| | 0/1 [00:00<?, ?it/s]"
339
- ]
340
- },
341
- "metadata": {},
342
- "output_type": "display_data"
343
- },
344
- {
345
- "name": "stderr",
346
- "output_type": "stream",
347
- "text": [
348
- "natural_pdf.search.haystack_search_service - INFO - Successfully embedded 6 documents.\n"
349
- ]
350
- },
351
- {
352
- "name": "stderr",
353
- "output_type": "stream",
354
- "text": [
355
- "natural_pdf.search.haystack_search_service - INFO - Writing 6 embedded documents to store 'default_collection'...\n"
356
- ]
357
- },
358
- {
359
- "name": "stderr",
360
- "output_type": "stream",
361
- "text": [
362
- "natural_pdf.search.haystack_search_service - INFO - Successfully wrote 6 documents to store 'default_collection'.\n"
363
- ]
364
- },
365
- {
366
- "name": "stderr",
367
- "output_type": "stream",
368
- "text": [
369
- "natural_pdf.search.haystack_search_service - INFO - Store 'default_collection' document count after write: 6\n"
370
- ]
371
- },
372
- {
373
- "name": "stderr",
374
- "output_type": "stream",
375
- "text": [
376
- "natural_pdf.search.searchable_mixin - INFO - Successfully completed indexing into SearchService collection 'default_collection'.\n"
377
- ]
378
- },
379
- {
380
- "name": "stdout",
381
- "output_type": "stream",
382
- "text": [
383
- "Search index initialized.\n"
384
- ]
385
- }
386
- ],
387
- "source": [
388
- "# Initialize search. 'index=True' builds the index immediately.\n",
389
- "# This might take some time depending on the number and size of PDFs.\n",
390
- "collection.init_search(index=True) \n",
391
- "print(\"Search index initialized.\")"
392
- ]
393
- },
394
- {
395
- "cell_type": "markdown",
396
- "id": "e1e86b3b",
397
- "metadata": {},
398
- "source": [
399
- "## Performing a Semantic Search\n",
400
- "\n",
401
- "Once the index is ready, you can use the `find_relevant()` method to search for content semantically related to your query."
402
- ]
403
- },
404
- {
405
- "cell_type": "code",
406
- "execution_count": 4,
407
- "id": "0d5308cd",
408
- "metadata": {
409
- "execution": {
410
- "iopub.execute_input": "2025-04-21T21:26:36.017436Z",
411
- "iopub.status.busy": "2025-04-21T21:26:36.017297Z",
412
- "iopub.status.idle": "2025-04-21T21:26:36.167418Z",
413
- "shell.execute_reply": "2025-04-21T21:26:36.167083Z"
414
- }
415
- },
416
- "outputs": [
417
- {
418
- "name": "stderr",
419
- "output_type": "stream",
420
- "text": [
421
- "natural_pdf.search.searchable_mixin - INFO - Searching collection 'default_collection' via HaystackSearchService...\n"
422
- ]
423
- },
424
- {
425
- "name": "stderr",
426
- "output_type": "stream",
427
- "text": [
428
- "natural_pdf.search.haystack_search_service - INFO - Search request for collection='default_collection', query_type=str, options=TextSearchOptions(top_k=10, retriever_top_k=20, filters=None, use_reranker=True, reranker_instance=None, reranker_model=None, reranker_api_key=None)\n"
429
- ]
430
- },
431
- {
432
- "name": "stderr",
433
- "output_type": "stream",
434
- "text": [
435
- "natural_pdf.search.haystack_search_service - INFO - Created SentenceTransformersTextEmbedder. Model: sentence-transformers/all-MiniLM-L6-v2, Device: ComponentDevice(_single_device=Device(type=<DeviceType.MPS: 'mps'>, id=None), _multiple_devices=None)\n"
436
- ]
437
- },
438
- {
439
- "data": {
440
- "application/vnd.jupyter.widget-view+json": {
441
- "model_id": "4fe0753b29774d8daf7ecb3f89719349",
442
- "version_major": 2,
443
- "version_minor": 0
444
- },
445
- "text/plain": [
446
- "Batches: 0%| | 0/1 [00:00<?, ?it/s]"
447
- ]
448
- },
449
- "metadata": {},
450
- "output_type": "display_data"
451
- },
452
- {
453
- "name": "stderr",
454
- "output_type": "stream",
455
- "text": [
456
- "natural_pdf.search.haystack_search_service - INFO - Running retrieval pipeline for collection 'default_collection'...\n"
457
- ]
458
- },
459
- {
460
- "name": "stderr",
461
- "output_type": "stream",
462
- "text": [
463
- "natural_pdf.search.haystack_search_service - INFO - Retrieved 6 documents.\n"
464
- ]
465
- },
466
- {
467
- "name": "stderr",
468
- "output_type": "stream",
469
- "text": [
470
- "natural_pdf.search.searchable_mixin - INFO - SearchService returned 6 results from collection 'default_collection'.\n"
471
- ]
472
- },
473
- {
474
- "name": "stdout",
475
- "output_type": "stream",
476
- "text": [
477
- "Found 6 results for 'american president':\n"
478
- ]
479
- }
480
- ],
481
- "source": [
482
- "# Perform a search query\n",
483
- "query = \"american president\"\n",
484
- "results = collection.find_relevant(query)\n",
485
- "\n",
486
- "print(f\"Found {len(results)} results for '{query}':\")"
487
- ]
488
- },
489
- {
490
- "cell_type": "markdown",
491
- "id": "fe6158f2",
492
- "metadata": {},
493
- "source": [
494
- "## Understanding Search Results\n",
495
- "\n",
496
- "The `find_relevant()` method returns a list of dictionaries, each representing a relevant text chunk found in one of the PDFs. Each result includes:\n",
497
- "\n",
498
- "* `pdf_path`: The path to the PDF document where the result was found.\n",
499
- "* `page_number`: The page number within the PDF.\n",
500
- "* `score`: A relevance score (higher means more relevant).\n",
501
- "* `content_snippet`: A snippet of the text chunk that matched the query."
502
- ]
503
- },
504
- {
505
- "cell_type": "code",
506
- "execution_count": 5,
507
- "id": "5ed7a221",
508
- "metadata": {
509
- "execution": {
510
- "iopub.execute_input": "2025-04-21T21:26:36.168858Z",
511
- "iopub.status.busy": "2025-04-21T21:26:36.168719Z",
512
- "iopub.status.idle": "2025-04-21T21:26:36.171586Z",
513
- "shell.execute_reply": "2025-04-21T21:26:36.171300Z"
514
- }
515
- },
516
- "outputs": [
517
- {
518
- "name": "stdout",
519
- "output_type": "stream",
520
- "text": [
521
- " 1. PDF: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n",
522
- " Page: 2 (Score: 0.0708)\n",
523
- " Snippet: \n",
524
- " \n",
525
- " Library Weeding Log ...\n",
526
- " 2. PDF: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n",
527
- " Page: 5 (Score: 0.0669)\n",
528
- " Snippet: \n",
529
- " \n",
530
- " Library Weeding Log ...\n",
531
- " 3. PDF: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp0qxzzh1m.pdf\n",
532
- " Page: 1 (Score: -0.0040)\n",
533
- " Snippet: \n",
534
- " \n",
535
- " ...\n",
536
- " 4. PDF: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n",
537
- " Page: 4 (Score: -0.0245)\n",
538
- " Snippet: \n",
539
- " \n",
540
- " Library Weeding Log ...\n",
541
- " 5. PDF: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n",
542
- " Page: 3 (Score: -0.0445)\n",
543
- " Snippet: \n",
544
- " \n",
545
- " Library Weeding Log ...\n",
546
- " 6. PDF: /var/folders/25/h3prywj14qb0mlkl2s8bxq5m0000gn/T/tmp67aw1giy.pdf\n",
547
- " Page: 1 (Score: -0.0473)\n",
548
- " Snippet: \n",
549
- " \n",
550
- " Library Weeding Log ...\n"
551
- ]
552
- }
553
- ],
554
- "source": [
555
- "# Process and display the results\n",
556
- "if results:\n",
557
- " for i, result in enumerate(results):\n",
558
- " print(f\" {i+1}. PDF: {result['pdf_path']}\")\n",
559
- " print(f\" Page: {result['page_number']} (Score: {result['score']:.4f})\")\n",
560
- " # Display a snippet of the content\n",
561
- " snippet = result.get('content_snippet', '')\n",
562
- " print(f\" Snippet: {snippet}...\") \n",
563
- "else:\n",
564
- " print(\" No relevant results found.\")\n",
565
- "\n",
566
- "# You can access the full content if needed via the result object, \n",
567
- "# though 'content_snippet' is usually sufficient for display."
568
- ]
569
- },
570
- {
571
- "cell_type": "markdown",
572
- "id": "1c628c5f",
573
- "metadata": {},
574
- "source": [
575
- "Semantic search allows you to efficiently query large sets of documents to find the most relevant information without needing exact keyword matches, leveraging the meaning and context of your query. "
576
- ]
577
- }
578
- ],
579
- "metadata": {
580
- "jupytext": {
581
- "cell_metadata_filter": "-all",
582
- "main_language": "python",
583
- "notebook_metadata_filter": "-all"
584
- },
585
- "language_info": {
586
- "codemirror_mode": {
587
- "name": "ipython",
588
- "version": 3
589
- },
590
- "file_extension": ".py",
591
- "mimetype": "text/x-python",
592
- "name": "python",
593
- "nbconvert_exporter": "python",
594
- "pygments_lexer": "ipython3",
595
- "version": "3.10.13"
596
- },
597
- "widgets": {
598
- "application/vnd.jupyter.widget-state+json": {
599
- "state": {
600
- "026c1486481a4a5cb64c3e3e4dfea542": {
601
- "model_module": "@jupyter-widgets/controls",
602
- "model_module_version": "2.0.0",
603
- "model_name": "HTMLStyleModel",
604
- "state": {
605
- "_model_module": "@jupyter-widgets/controls",
606
- "_model_module_version": "2.0.0",
607
- "_model_name": "HTMLStyleModel",
608
- "_view_count": null,
609
- "_view_module": "@jupyter-widgets/base",
610
- "_view_module_version": "2.0.0",
611
- "_view_name": "StyleView",
612
- "background": null,
613
- "description_width": "",
614
- "font_size": null,
615
- "text_color": null
616
- }
617
- },
618
- "0cecc462036e46ceabe7e0bbf41f086c": {
619
- "model_module": "@jupyter-widgets/base",
620
- "model_module_version": "2.0.0",
621
- "model_name": "LayoutModel",
622
- "state": {
623
- "_model_module": "@jupyter-widgets/base",
624
- "_model_module_version": "2.0.0",
625
- "_model_name": "LayoutModel",
626
- "_view_count": null,
627
- "_view_module": "@jupyter-widgets/base",
628
- "_view_module_version": "2.0.0",
629
- "_view_name": "LayoutView",
630
- "align_content": null,
631
- "align_items": null,
632
- "align_self": null,
633
- "border_bottom": null,
634
- "border_left": null,
635
- "border_right": null,
636
- "border_top": null,
637
- "bottom": null,
638
- "display": null,
639
- "flex": null,
640
- "flex_flow": null,
641
- "grid_area": null,
642
- "grid_auto_columns": null,
643
- "grid_auto_flow": null,
644
- "grid_auto_rows": null,
645
- "grid_column": null,
646
- "grid_gap": null,
647
- "grid_row": null,
648
- "grid_template_areas": null,
649
- "grid_template_columns": null,
650
- "grid_template_rows": null,
651
- "height": null,
652
- "justify_content": null,
653
- "justify_items": null,
654
- "left": null,
655
- "margin": null,
656
- "max_height": null,
657
- "max_width": null,
658
- "min_height": null,
659
- "min_width": null,
660
- "object_fit": null,
661
- "object_position": null,
662
- "order": null,
663
- "overflow": null,
664
- "padding": null,
665
- "right": null,
666
- "top": null,
667
- "visibility": null,
668
- "width": null
669
- }
670
- },
671
- "1c916b704c77462ebf088c5b45543e05": {
672
- "model_module": "@jupyter-widgets/controls",
673
- "model_module_version": "2.0.0",
674
- "model_name": "HTMLModel",
675
- "state": {
676
- "_dom_classes": [],
677
- "_model_module": "@jupyter-widgets/controls",
678
- "_model_module_version": "2.0.0",
679
- "_model_name": "HTMLModel",
680
- "_view_count": null,
681
- "_view_module": "@jupyter-widgets/controls",
682
- "_view_module_version": "2.0.0",
683
- "_view_name": "HTMLView",
684
- "description": "",
685
- "description_allow_html": false,
686
- "layout": "IPY_MODEL_bbd4b71814b24819bc02e8a3ae7cf936",
687
- "placeholder": "​",
688
- "style": "IPY_MODEL_026c1486481a4a5cb64c3e3e4dfea542",
689
- "tabbable": null,
690
- "tooltip": null,
691
- "value": "Batches: 100%"
692
- }
693
- },
694
- "23da5426a33a4f2e8eba26ae5371beb0": {
695
- "model_module": "@jupyter-widgets/controls",
696
- "model_module_version": "2.0.0",
697
- "model_name": "FloatProgressModel",
698
- "state": {
699
- "_dom_classes": [],
700
- "_model_module": "@jupyter-widgets/controls",
701
- "_model_module_version": "2.0.0",
702
- "_model_name": "FloatProgressModel",
703
- "_view_count": null,
704
- "_view_module": "@jupyter-widgets/controls",
705
- "_view_module_version": "2.0.0",
706
- "_view_name": "ProgressView",
707
- "bar_style": "success",
708
- "description": "",
709
- "description_allow_html": false,
710
- "layout": "IPY_MODEL_b9c7a538fab94ea08463c82c24344a64",
711
- "max": 1.0,
712
- "min": 0.0,
713
- "orientation": "horizontal",
714
- "style": "IPY_MODEL_f58162f746b140a8bc38b978a971b841",
715
- "tabbable": null,
716
- "tooltip": null,
717
- "value": 1.0
718
- }
719
- },
720
- "327031d1fca04e19ba9fb925ea1f07af": {
721
- "model_module": "@jupyter-widgets/controls",
722
- "model_module_version": "2.0.0",
723
- "model_name": "HTMLModel",
724
- "state": {
725
- "_dom_classes": [],
726
- "_model_module": "@jupyter-widgets/controls",
727
- "_model_module_version": "2.0.0",
728
- "_model_name": "HTMLModel",
729
- "_view_count": null,
730
- "_view_module": "@jupyter-widgets/controls",
731
- "_view_module_version": "2.0.0",
732
- "_view_name": "HTMLView",
733
- "description": "",
734
- "description_allow_html": false,
735
- "layout": "IPY_MODEL_dca88061cb6f43748dcdc1132ecccc90",
736
- "placeholder": "​",
737
- "style": "IPY_MODEL_675d17109fee467495b1e03599ac9261",
738
- "tabbable": null,
739
- "tooltip": null,
740
- "value": " 1/1 [00:00&lt;00:00,  7.20it/s]"
741
- }
742
- },
743
- "3ccf196c0d0d4dd089991bd63d435c4d": {
744
- "model_module": "@jupyter-widgets/base",
745
- "model_module_version": "2.0.0",
746
- "model_name": "LayoutModel",
747
- "state": {
748
- "_model_module": "@jupyter-widgets/base",
749
- "_model_module_version": "2.0.0",
750
- "_model_name": "LayoutModel",
751
- "_view_count": null,
752
- "_view_module": "@jupyter-widgets/base",
753
- "_view_module_version": "2.0.0",
754
- "_view_name": "LayoutView",
755
- "align_content": null,
756
- "align_items": null,
757
- "align_self": null,
758
- "border_bottom": null,
759
- "border_left": null,
760
- "border_right": null,
761
- "border_top": null,
762
- "bottom": null,
763
- "display": null,
764
- "flex": null,
765
- "flex_flow": null,
766
- "grid_area": null,
767
- "grid_auto_columns": null,
768
- "grid_auto_flow": null,
769
- "grid_auto_rows": null,
770
- "grid_column": null,
771
- "grid_gap": null,
772
- "grid_row": null,
773
- "grid_template_areas": null,
774
- "grid_template_columns": null,
775
- "grid_template_rows": null,
776
- "height": null,
777
- "justify_content": null,
778
- "justify_items": null,
779
- "left": null,
780
- "margin": null,
781
- "max_height": null,
782
- "max_width": null,
783
- "min_height": null,
784
- "min_width": null,
785
- "object_fit": null,
786
- "object_position": null,
787
- "order": null,
788
- "overflow": null,
789
- "padding": null,
790
- "right": null,
791
- "top": null,
792
- "visibility": null,
793
- "width": null
794
- }
795
- },
796
- "4fe0753b29774d8daf7ecb3f89719349": {
797
- "model_module": "@jupyter-widgets/controls",
798
- "model_module_version": "2.0.0",
799
- "model_name": "HBoxModel",
800
- "state": {
801
- "_dom_classes": [],
802
- "_model_module": "@jupyter-widgets/controls",
803
- "_model_module_version": "2.0.0",
804
- "_model_name": "HBoxModel",
805
- "_view_count": null,
806
- "_view_module": "@jupyter-widgets/controls",
807
- "_view_module_version": "2.0.0",
808
- "_view_name": "HBoxView",
809
- "box_style": "",
810
- "children": [
811
- "IPY_MODEL_8aae53bc7d2d495ca6eb05f98d6fc8da",
812
- "IPY_MODEL_23da5426a33a4f2e8eba26ae5371beb0",
813
- "IPY_MODEL_327031d1fca04e19ba9fb925ea1f07af"
814
- ],
815
- "layout": "IPY_MODEL_f991f9535ca04bc2bee7f1964f1132db",
816
- "tabbable": null,
817
- "tooltip": null
818
- }
819
- },
820
- "548fdb05df104726991dc64580769247": {
821
- "model_module": "@jupyter-widgets/controls",
822
- "model_module_version": "2.0.0",
823
- "model_name": "HTMLStyleModel",
824
- "state": {
825
- "_model_module": "@jupyter-widgets/controls",
826
- "_model_module_version": "2.0.0",
827
- "_model_name": "HTMLStyleModel",
828
- "_view_count": null,
829
- "_view_module": "@jupyter-widgets/base",
830
- "_view_module_version": "2.0.0",
831
- "_view_name": "StyleView",
832
- "background": null,
833
- "description_width": "",
834
- "font_size": null,
835
- "text_color": null
836
- }
837
- },
838
- "675d17109fee467495b1e03599ac9261": {
839
- "model_module": "@jupyter-widgets/controls",
840
- "model_module_version": "2.0.0",
841
- "model_name": "HTMLStyleModel",
842
- "state": {
843
- "_model_module": "@jupyter-widgets/controls",
844
- "_model_module_version": "2.0.0",
845
- "_model_name": "HTMLStyleModel",
846
- "_view_count": null,
847
- "_view_module": "@jupyter-widgets/base",
848
- "_view_module_version": "2.0.0",
849
- "_view_name": "StyleView",
850
- "background": null,
851
- "description_width": "",
852
- "font_size": null,
853
- "text_color": null
854
- }
855
- },
856
- "7ce5089ebf82424abba28acc0ce522da": {
857
- "model_module": "@jupyter-widgets/controls",
858
- "model_module_version": "2.0.0",
859
- "model_name": "ProgressStyleModel",
860
- "state": {
861
- "_model_module": "@jupyter-widgets/controls",
862
- "_model_module_version": "2.0.0",
863
- "_model_name": "ProgressStyleModel",
864
- "_view_count": null,
865
- "_view_module": "@jupyter-widgets/base",
866
- "_view_module_version": "2.0.0",
867
- "_view_name": "StyleView",
868
- "bar_color": null,
869
- "description_width": ""
870
- }
871
- },
872
- "8328a5952f5645f3b31ab5351fb16a9a": {
873
- "model_module": "@jupyter-widgets/controls",
874
- "model_module_version": "2.0.0",
875
- "model_name": "FloatProgressModel",
876
- "state": {
877
- "_dom_classes": [],
878
- "_model_module": "@jupyter-widgets/controls",
879
- "_model_module_version": "2.0.0",
880
- "_model_name": "FloatProgressModel",
881
- "_view_count": null,
882
- "_view_module": "@jupyter-widgets/controls",
883
- "_view_module_version": "2.0.0",
884
- "_view_name": "ProgressView",
885
- "bar_style": "success",
886
- "description": "",
887
- "description_allow_html": false,
888
- "layout": "IPY_MODEL_96691282e844404ca0686f8777371ba9",
889
- "max": 1.0,
890
- "min": 0.0,
891
- "orientation": "horizontal",
892
- "style": "IPY_MODEL_7ce5089ebf82424abba28acc0ce522da",
893
- "tabbable": null,
894
- "tooltip": null,
895
- "value": 1.0
896
- }
897
- },
898
- "8aae53bc7d2d495ca6eb05f98d6fc8da": {
899
- "model_module": "@jupyter-widgets/controls",
900
- "model_module_version": "2.0.0",
901
- "model_name": "HTMLModel",
902
- "state": {
903
- "_dom_classes": [],
904
- "_model_module": "@jupyter-widgets/controls",
905
- "_model_module_version": "2.0.0",
906
- "_model_name": "HTMLModel",
907
- "_view_count": null,
908
- "_view_module": "@jupyter-widgets/controls",
909
- "_view_module_version": "2.0.0",
910
- "_view_name": "HTMLView",
911
- "description": "",
912
- "description_allow_html": false,
913
- "layout": "IPY_MODEL_8e70848dd5404708b60063a404310668",
914
- "placeholder": "​",
915
- "style": "IPY_MODEL_b9f037775be645de95f0c49ce550385a",
916
- "tabbable": null,
917
- "tooltip": null,
918
- "value": "Batches: 100%"
919
- }
920
- },
921
- "8e70848dd5404708b60063a404310668": {
922
- "model_module": "@jupyter-widgets/base",
923
- "model_module_version": "2.0.0",
924
- "model_name": "LayoutModel",
925
- "state": {
926
- "_model_module": "@jupyter-widgets/base",
927
- "_model_module_version": "2.0.0",
928
- "_model_name": "LayoutModel",
929
- "_view_count": null,
930
- "_view_module": "@jupyter-widgets/base",
931
- "_view_module_version": "2.0.0",
932
- "_view_name": "LayoutView",
933
- "align_content": null,
934
- "align_items": null,
935
- "align_self": null,
936
- "border_bottom": null,
937
- "border_left": null,
938
- "border_right": null,
939
- "border_top": null,
940
- "bottom": null,
941
- "display": null,
942
- "flex": null,
943
- "flex_flow": null,
944
- "grid_area": null,
945
- "grid_auto_columns": null,
946
- "grid_auto_flow": null,
947
- "grid_auto_rows": null,
948
- "grid_column": null,
949
- "grid_gap": null,
950
- "grid_row": null,
951
- "grid_template_areas": null,
952
- "grid_template_columns": null,
953
- "grid_template_rows": null,
954
- "height": null,
955
- "justify_content": null,
956
- "justify_items": null,
957
- "left": null,
958
- "margin": null,
959
- "max_height": null,
960
- "max_width": null,
961
- "min_height": null,
962
- "min_width": null,
963
- "object_fit": null,
964
- "object_position": null,
965
- "order": null,
966
- "overflow": null,
967
- "padding": null,
968
- "right": null,
969
- "top": null,
970
- "visibility": null,
971
- "width": null
972
- }
973
- },
974
- "96691282e844404ca0686f8777371ba9": {
975
- "model_module": "@jupyter-widgets/base",
976
- "model_module_version": "2.0.0",
977
- "model_name": "LayoutModel",
978
- "state": {
979
- "_model_module": "@jupyter-widgets/base",
980
- "_model_module_version": "2.0.0",
981
- "_model_name": "LayoutModel",
982
- "_view_count": null,
983
- "_view_module": "@jupyter-widgets/base",
984
- "_view_module_version": "2.0.0",
985
- "_view_name": "LayoutView",
986
- "align_content": null,
987
- "align_items": null,
988
- "align_self": null,
989
- "border_bottom": null,
990
- "border_left": null,
991
- "border_right": null,
992
- "border_top": null,
993
- "bottom": null,
994
- "display": null,
995
- "flex": null,
996
- "flex_flow": null,
997
- "grid_area": null,
998
- "grid_auto_columns": null,
999
- "grid_auto_flow": null,
1000
- "grid_auto_rows": null,
1001
- "grid_column": null,
1002
- "grid_gap": null,
1003
- "grid_row": null,
1004
- "grid_template_areas": null,
1005
- "grid_template_columns": null,
1006
- "grid_template_rows": null,
1007
- "height": null,
1008
- "justify_content": null,
1009
- "justify_items": null,
1010
- "left": null,
1011
- "margin": null,
1012
- "max_height": null,
1013
- "max_width": null,
1014
- "min_height": null,
1015
- "min_width": null,
1016
- "object_fit": null,
1017
- "object_position": null,
1018
- "order": null,
1019
- "overflow": null,
1020
- "padding": null,
1021
- "right": null,
1022
- "top": null,
1023
- "visibility": null,
1024
- "width": null
1025
- }
1026
- },
1027
- "b9c7a538fab94ea08463c82c24344a64": {
1028
- "model_module": "@jupyter-widgets/base",
1029
- "model_module_version": "2.0.0",
1030
- "model_name": "LayoutModel",
1031
- "state": {
1032
- "_model_module": "@jupyter-widgets/base",
1033
- "_model_module_version": "2.0.0",
1034
- "_model_name": "LayoutModel",
1035
- "_view_count": null,
1036
- "_view_module": "@jupyter-widgets/base",
1037
- "_view_module_version": "2.0.0",
1038
- "_view_name": "LayoutView",
1039
- "align_content": null,
1040
- "align_items": null,
1041
- "align_self": null,
1042
- "border_bottom": null,
1043
- "border_left": null,
1044
- "border_right": null,
1045
- "border_top": null,
1046
- "bottom": null,
1047
- "display": null,
1048
- "flex": null,
1049
- "flex_flow": null,
1050
- "grid_area": null,
1051
- "grid_auto_columns": null,
1052
- "grid_auto_flow": null,
1053
- "grid_auto_rows": null,
1054
- "grid_column": null,
1055
- "grid_gap": null,
1056
- "grid_row": null,
1057
- "grid_template_areas": null,
1058
- "grid_template_columns": null,
1059
- "grid_template_rows": null,
1060
- "height": null,
1061
- "justify_content": null,
1062
- "justify_items": null,
1063
- "left": null,
1064
- "margin": null,
1065
- "max_height": null,
1066
- "max_width": null,
1067
- "min_height": null,
1068
- "min_width": null,
1069
- "object_fit": null,
1070
- "object_position": null,
1071
- "order": null,
1072
- "overflow": null,
1073
- "padding": null,
1074
- "right": null,
1075
- "top": null,
1076
- "visibility": null,
1077
- "width": null
1078
- }
1079
- },
1080
- "b9f037775be645de95f0c49ce550385a": {
1081
- "model_module": "@jupyter-widgets/controls",
1082
- "model_module_version": "2.0.0",
1083
- "model_name": "HTMLStyleModel",
1084
- "state": {
1085
- "_model_module": "@jupyter-widgets/controls",
1086
- "_model_module_version": "2.0.0",
1087
- "_model_name": "HTMLStyleModel",
1088
- "_view_count": null,
1089
- "_view_module": "@jupyter-widgets/base",
1090
- "_view_module_version": "2.0.0",
1091
- "_view_name": "StyleView",
1092
- "background": null,
1093
- "description_width": "",
1094
- "font_size": null,
1095
- "text_color": null
1096
- }
1097
- },
1098
- "bbd4b71814b24819bc02e8a3ae7cf936": {
1099
- "model_module": "@jupyter-widgets/base",
1100
- "model_module_version": "2.0.0",
1101
- "model_name": "LayoutModel",
1102
- "state": {
1103
- "_model_module": "@jupyter-widgets/base",
1104
- "_model_module_version": "2.0.0",
1105
- "_model_name": "LayoutModel",
1106
- "_view_count": null,
1107
- "_view_module": "@jupyter-widgets/base",
1108
- "_view_module_version": "2.0.0",
1109
- "_view_name": "LayoutView",
1110
- "align_content": null,
1111
- "align_items": null,
1112
- "align_self": null,
1113
- "border_bottom": null,
1114
- "border_left": null,
1115
- "border_right": null,
1116
- "border_top": null,
1117
- "bottom": null,
1118
- "display": null,
1119
- "flex": null,
1120
- "flex_flow": null,
1121
- "grid_area": null,
1122
- "grid_auto_columns": null,
1123
- "grid_auto_flow": null,
1124
- "grid_auto_rows": null,
1125
- "grid_column": null,
1126
- "grid_gap": null,
1127
- "grid_row": null,
1128
- "grid_template_areas": null,
1129
- "grid_template_columns": null,
1130
- "grid_template_rows": null,
1131
- "height": null,
1132
- "justify_content": null,
1133
- "justify_items": null,
1134
- "left": null,
1135
- "margin": null,
1136
- "max_height": null,
1137
- "max_width": null,
1138
- "min_height": null,
1139
- "min_width": null,
1140
- "object_fit": null,
1141
- "object_position": null,
1142
- "order": null,
1143
- "overflow": null,
1144
- "padding": null,
1145
- "right": null,
1146
- "top": null,
1147
- "visibility": null,
1148
- "width": null
1149
- }
1150
- },
1151
- "cf2741c35c4e430a80c721891fe023d9": {
1152
- "model_module": "@jupyter-widgets/controls",
1153
- "model_module_version": "2.0.0",
1154
- "model_name": "HBoxModel",
1155
- "state": {
1156
- "_dom_classes": [],
1157
- "_model_module": "@jupyter-widgets/controls",
1158
- "_model_module_version": "2.0.0",
1159
- "_model_name": "HBoxModel",
1160
- "_view_count": null,
1161
- "_view_module": "@jupyter-widgets/controls",
1162
- "_view_module_version": "2.0.0",
1163
- "_view_name": "HBoxView",
1164
- "box_style": "",
1165
- "children": [
1166
- "IPY_MODEL_1c916b704c77462ebf088c5b45543e05",
1167
- "IPY_MODEL_8328a5952f5645f3b31ab5351fb16a9a",
1168
- "IPY_MODEL_e567efbe84ed461d85b57fc9d69d9d9c"
1169
- ],
1170
- "layout": "IPY_MODEL_0cecc462036e46ceabe7e0bbf41f086c",
1171
- "tabbable": null,
1172
- "tooltip": null
1173
- }
1174
- },
1175
- "dca88061cb6f43748dcdc1132ecccc90": {
1176
- "model_module": "@jupyter-widgets/base",
1177
- "model_module_version": "2.0.0",
1178
- "model_name": "LayoutModel",
1179
- "state": {
1180
- "_model_module": "@jupyter-widgets/base",
1181
- "_model_module_version": "2.0.0",
1182
- "_model_name": "LayoutModel",
1183
- "_view_count": null,
1184
- "_view_module": "@jupyter-widgets/base",
1185
- "_view_module_version": "2.0.0",
1186
- "_view_name": "LayoutView",
1187
- "align_content": null,
1188
- "align_items": null,
1189
- "align_self": null,
1190
- "border_bottom": null,
1191
- "border_left": null,
1192
- "border_right": null,
1193
- "border_top": null,
1194
- "bottom": null,
1195
- "display": null,
1196
- "flex": null,
1197
- "flex_flow": null,
1198
- "grid_area": null,
1199
- "grid_auto_columns": null,
1200
- "grid_auto_flow": null,
1201
- "grid_auto_rows": null,
1202
- "grid_column": null,
1203
- "grid_gap": null,
1204
- "grid_row": null,
1205
- "grid_template_areas": null,
1206
- "grid_template_columns": null,
1207
- "grid_template_rows": null,
1208
- "height": null,
1209
- "justify_content": null,
1210
- "justify_items": null,
1211
- "left": null,
1212
- "margin": null,
1213
- "max_height": null,
1214
- "max_width": null,
1215
- "min_height": null,
1216
- "min_width": null,
1217
- "object_fit": null,
1218
- "object_position": null,
1219
- "order": null,
1220
- "overflow": null,
1221
- "padding": null,
1222
- "right": null,
1223
- "top": null,
1224
- "visibility": null,
1225
- "width": null
1226
- }
1227
- },
1228
- "e567efbe84ed461d85b57fc9d69d9d9c": {
1229
- "model_module": "@jupyter-widgets/controls",
1230
- "model_module_version": "2.0.0",
1231
- "model_name": "HTMLModel",
1232
- "state": {
1233
- "_dom_classes": [],
1234
- "_model_module": "@jupyter-widgets/controls",
1235
- "_model_module_version": "2.0.0",
1236
- "_model_name": "HTMLModel",
1237
- "_view_count": null,
1238
- "_view_module": "@jupyter-widgets/controls",
1239
- "_view_module_version": "2.0.0",
1240
- "_view_name": "HTMLView",
1241
- "description": "",
1242
- "description_allow_html": false,
1243
- "layout": "IPY_MODEL_3ccf196c0d0d4dd089991bd63d435c4d",
1244
- "placeholder": "​",
1245
- "style": "IPY_MODEL_548fdb05df104726991dc64580769247",
1246
- "tabbable": null,
1247
- "tooltip": null,
1248
- "value": " 1/1 [00:00&lt;00:00,  4.09it/s]"
1249
- }
1250
- },
1251
- "f58162f746b140a8bc38b978a971b841": {
1252
- "model_module": "@jupyter-widgets/controls",
1253
- "model_module_version": "2.0.0",
1254
- "model_name": "ProgressStyleModel",
1255
- "state": {
1256
- "_model_module": "@jupyter-widgets/controls",
1257
- "_model_module_version": "2.0.0",
1258
- "_model_name": "ProgressStyleModel",
1259
- "_view_count": null,
1260
- "_view_module": "@jupyter-widgets/base",
1261
- "_view_module_version": "2.0.0",
1262
- "_view_name": "StyleView",
1263
- "bar_color": null,
1264
- "description_width": ""
1265
- }
1266
- },
1267
- "f991f9535ca04bc2bee7f1964f1132db": {
1268
- "model_module": "@jupyter-widgets/base",
1269
- "model_module_version": "2.0.0",
1270
- "model_name": "LayoutModel",
1271
- "state": {
1272
- "_model_module": "@jupyter-widgets/base",
1273
- "_model_module_version": "2.0.0",
1274
- "_model_name": "LayoutModel",
1275
- "_view_count": null,
1276
- "_view_module": "@jupyter-widgets/base",
1277
- "_view_module_version": "2.0.0",
1278
- "_view_name": "LayoutView",
1279
- "align_content": null,
1280
- "align_items": null,
1281
- "align_self": null,
1282
- "border_bottom": null,
1283
- "border_left": null,
1284
- "border_right": null,
1285
- "border_top": null,
1286
- "bottom": null,
1287
- "display": null,
1288
- "flex": null,
1289
- "flex_flow": null,
1290
- "grid_area": null,
1291
- "grid_auto_columns": null,
1292
- "grid_auto_flow": null,
1293
- "grid_auto_rows": null,
1294
- "grid_column": null,
1295
- "grid_gap": null,
1296
- "grid_row": null,
1297
- "grid_template_areas": null,
1298
- "grid_template_columns": null,
1299
- "grid_template_rows": null,
1300
- "height": null,
1301
- "justify_content": null,
1302
- "justify_items": null,
1303
- "left": null,
1304
- "margin": null,
1305
- "max_height": null,
1306
- "max_width": null,
1307
- "min_height": null,
1308
- "min_width": null,
1309
- "object_fit": null,
1310
- "object_position": null,
1311
- "order": null,
1312
- "overflow": null,
1313
- "padding": null,
1314
- "right": null,
1315
- "top": null,
1316
- "visibility": null,
1317
- "width": null
1318
- }
1319
- }
1320
- },
1321
- "version_major": 2,
1322
- "version_minor": 0
1323
- }
1324
- }
1325
- },
1326
- "nbformat": 4,
1327
- "nbformat_minor": 5
1328
- }