natural-pdf 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/__init__.py +1 -0
- natural_pdf/analyzers/layout/base.py +1 -5
- natural_pdf/analyzers/layout/gemini.py +61 -51
- natural_pdf/analyzers/layout/layout_analyzer.py +40 -11
- natural_pdf/analyzers/layout/layout_manager.py +26 -84
- natural_pdf/analyzers/layout/layout_options.py +7 -0
- natural_pdf/analyzers/layout/pdfplumber_table_finder.py +142 -0
- natural_pdf/analyzers/layout/surya.py +46 -123
- natural_pdf/analyzers/layout/tatr.py +51 -4
- natural_pdf/analyzers/text_structure.py +3 -5
- natural_pdf/analyzers/utils.py +3 -3
- natural_pdf/classification/manager.py +241 -158
- natural_pdf/classification/mixin.py +52 -38
- natural_pdf/classification/results.py +71 -45
- natural_pdf/collections/mixins.py +85 -20
- natural_pdf/collections/pdf_collection.py +245 -100
- natural_pdf/core/element_manager.py +30 -14
- natural_pdf/core/highlighting_service.py +13 -22
- natural_pdf/core/page.py +423 -101
- natural_pdf/core/pdf.py +694 -195
- natural_pdf/elements/base.py +134 -40
- natural_pdf/elements/collections.py +610 -134
- natural_pdf/elements/region.py +659 -90
- natural_pdf/elements/text.py +1 -1
- natural_pdf/export/mixin.py +137 -0
- natural_pdf/exporters/base.py +3 -3
- natural_pdf/exporters/paddleocr.py +4 -3
- natural_pdf/extraction/manager.py +50 -49
- natural_pdf/extraction/mixin.py +90 -57
- natural_pdf/extraction/result.py +9 -23
- natural_pdf/ocr/__init__.py +5 -5
- natural_pdf/ocr/engine_doctr.py +346 -0
- natural_pdf/ocr/ocr_factory.py +24 -4
- natural_pdf/ocr/ocr_manager.py +61 -25
- natural_pdf/ocr/ocr_options.py +70 -10
- natural_pdf/ocr/utils.py +6 -4
- natural_pdf/search/__init__.py +20 -34
- natural_pdf/search/haystack_search_service.py +309 -265
- natural_pdf/search/haystack_utils.py +99 -75
- natural_pdf/search/search_service_protocol.py +11 -12
- natural_pdf/selectors/parser.py +219 -143
- natural_pdf/utils/debug.py +3 -3
- natural_pdf/utils/identifiers.py +1 -1
- natural_pdf/utils/locks.py +1 -1
- natural_pdf/utils/packaging.py +8 -6
- natural_pdf/utils/text_extraction.py +24 -16
- natural_pdf/utils/tqdm_utils.py +18 -10
- natural_pdf/utils/visualization.py +18 -0
- natural_pdf/widgets/viewer.py +4 -25
- {natural_pdf-0.1.8.dist-info → natural_pdf-0.1.10.dist-info}/METADATA +12 -3
- natural_pdf-0.1.10.dist-info/RECORD +80 -0
- {natural_pdf-0.1.8.dist-info → natural_pdf-0.1.10.dist-info}/WHEEL +1 -1
- {natural_pdf-0.1.8.dist-info → natural_pdf-0.1.10.dist-info}/top_level.txt +0 -2
- docs/api/index.md +0 -386
- docs/assets/favicon.png +0 -3
- docs/assets/favicon.svg +0 -3
- docs/assets/javascripts/custom.js +0 -17
- docs/assets/logo.svg +0 -3
- docs/assets/sample-screen.png +0 -0
- docs/assets/social-preview.png +0 -17
- docs/assets/social-preview.svg +0 -17
- docs/assets/stylesheets/custom.css +0 -65
- docs/categorizing-documents/index.md +0 -168
- docs/data-extraction/index.md +0 -87
- docs/document-qa/index.ipynb +0 -435
- docs/document-qa/index.md +0 -79
- docs/element-selection/index.ipynb +0 -969
- docs/element-selection/index.md +0 -249
- docs/finetuning/index.md +0 -176
- docs/index.md +0 -189
- docs/installation/index.md +0 -69
- docs/interactive-widget/index.ipynb +0 -962
- docs/interactive-widget/index.md +0 -12
- docs/layout-analysis/index.ipynb +0 -818
- docs/layout-analysis/index.md +0 -185
- docs/ocr/index.md +0 -256
- docs/pdf-navigation/index.ipynb +0 -314
- docs/pdf-navigation/index.md +0 -97
- docs/regions/index.ipynb +0 -816
- docs/regions/index.md +0 -294
- docs/tables/index.ipynb +0 -658
- docs/tables/index.md +0 -144
- docs/text-analysis/index.ipynb +0 -370
- docs/text-analysis/index.md +0 -105
- docs/text-extraction/index.ipynb +0 -1478
- docs/text-extraction/index.md +0 -292
- docs/tutorials/01-loading-and-extraction.ipynb +0 -1873
- docs/tutorials/01-loading-and-extraction.md +0 -95
- docs/tutorials/02-finding-elements.ipynb +0 -417
- docs/tutorials/02-finding-elements.md +0 -149
- docs/tutorials/03-extracting-blocks.ipynb +0 -152
- docs/tutorials/03-extracting-blocks.md +0 -48
- docs/tutorials/04-table-extraction.ipynb +0 -119
- docs/tutorials/04-table-extraction.md +0 -50
- docs/tutorials/05-excluding-content.ipynb +0 -275
- docs/tutorials/05-excluding-content.md +0 -109
- docs/tutorials/06-document-qa.ipynb +0 -337
- docs/tutorials/06-document-qa.md +0 -91
- docs/tutorials/07-layout-analysis.ipynb +0 -293
- docs/tutorials/07-layout-analysis.md +0 -66
- docs/tutorials/07-working-with-regions.ipynb +0 -414
- docs/tutorials/07-working-with-regions.md +0 -151
- docs/tutorials/08-spatial-navigation.ipynb +0 -513
- docs/tutorials/08-spatial-navigation.md +0 -190
- docs/tutorials/09-section-extraction.ipynb +0 -2439
- docs/tutorials/09-section-extraction.md +0 -256
- docs/tutorials/10-form-field-extraction.ipynb +0 -517
- docs/tutorials/10-form-field-extraction.md +0 -201
- docs/tutorials/11-enhanced-table-processing.ipynb +0 -59
- docs/tutorials/11-enhanced-table-processing.md +0 -9
- docs/tutorials/12-ocr-integration.ipynb +0 -3712
- docs/tutorials/12-ocr-integration.md +0 -137
- docs/tutorials/13-semantic-search.ipynb +0 -1718
- docs/tutorials/13-semantic-search.md +0 -77
- docs/visual-debugging/index.ipynb +0 -2970
- docs/visual-debugging/index.md +0 -157
- docs/visual-debugging/region.png +0 -0
- natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -420
- natural_pdf/templates/spa/css/style.css +0 -334
- natural_pdf/templates/spa/index.html +0 -31
- natural_pdf/templates/spa/js/app.js +0 -472
- natural_pdf/templates/spa/words.txt +0 -235976
- natural_pdf/widgets/frontend/viewer.js +0 -88
- natural_pdf-0.1.8.dist-info/RECORD +0 -156
- notebooks/Examples.ipynb +0 -1293
- pdfs/.gitkeep +0 -0
- pdfs/01-practice.pdf +0 -543
- pdfs/0500000US42001.pdf +0 -0
- pdfs/0500000US42007.pdf +0 -0
- pdfs/2014 Statistics.pdf +0 -0
- pdfs/2019 Statistics.pdf +0 -0
- pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
- pdfs/needs-ocr.pdf +0 -0
- {natural_pdf-0.1.8.dist-info → natural_pdf-0.1.10.dist-info}/licenses/LICENSE +0 -0
@@ -1,88 +0,0 @@
|
|
1
|
-
// natural_pdf/widgets/frontend/viewer.js
|
2
|
-
// Minimal version for debugging module loading
|
3
|
-
|
4
|
-
(function() {
|
5
|
-
// Use a flag to prevent multiple definitions if this script runs multiple times
|
6
|
-
if (window.interactiveViewerWidgetDefined) {
|
7
|
-
console.log("[DEBUG] viewer_widget already defined. Skipping re-definition.");
|
8
|
-
// If it was already defined, maybe trigger a manual load if require is available?
|
9
|
-
// This is tricky because the initial load might have failed partially.
|
10
|
-
if (typeof require !== 'undefined') {
|
11
|
-
console.log("[DEBUG] Attempting require(['viewer_widget'])...");
|
12
|
-
try {
|
13
|
-
require(['viewer_widget'], function(module) {
|
14
|
-
console.log("[DEBUG] Manual require succeeded:", module);
|
15
|
-
}, function(err) {
|
16
|
-
console.error("[DEBUG] Manual require failed:", err);
|
17
|
-
});
|
18
|
-
} catch (e) {
|
19
|
-
console.error("[DEBUG] Error during manual require:", e);
|
20
|
-
}
|
21
|
-
}
|
22
|
-
return;
|
23
|
-
}
|
24
|
-
window.interactiveViewerWidgetDefined = true;
|
25
|
-
console.log("[DEBUG] Defining viewer_widget module for the first time...");
|
26
|
-
|
27
|
-
// Check for requirejs *after* setting the flag, before defining
|
28
|
-
if (typeof requirejs === 'undefined') {
|
29
|
-
console.error('[DEBUG] requirejs is still not defined. Widget frontend cannot load.');
|
30
|
-
// Maybe display an error in the widget area itself?
|
31
|
-
// This suggests a fundamental issue with the Jupyter environment setup.
|
32
|
-
return;
|
33
|
-
}
|
34
|
-
if (typeof define !== 'function' || !define.amd) {
|
35
|
-
console.error('[DEBUG] define is not a function or define.amd is missing. Cannot define module.');
|
36
|
-
return;
|
37
|
-
}
|
38
|
-
|
39
|
-
// Clear any previous potentially failed definition
|
40
|
-
require.undef('viewer_widget');
|
41
|
-
|
42
|
-
// Define the module
|
43
|
-
define('viewer_widget', ['@jupyter-widgets/base'], function(widgets) {
|
44
|
-
console.log("[DEBUG] viewer_widget define callback executed.");
|
45
|
-
console.log("[DEBUG] @jupyter-widgets/base loaded:", widgets);
|
46
|
-
|
47
|
-
// Define a very simple view class
|
48
|
-
class InteractiveViewerView extends widgets.DOMWidgetView {
|
49
|
-
render() {
|
50
|
-
console.log("[DEBUG] InteractiveViewerView: render() called.");
|
51
|
-
this.el.textContent = 'Minimal Widget Loaded!'; // Simple text content
|
52
|
-
this.el.style.border = '2px solid green';
|
53
|
-
this.el.style.padding = '10px';
|
54
|
-
|
55
|
-
// Log received data
|
56
|
-
this.model.on('change:image_uri', () => console.log("[DEBUG] image_uri changed:", this.model.get('image_uri') ? 'Present' : 'Empty'), this);
|
57
|
-
this.model.on('change:page_dimensions', () => console.log("[DEBUG] page_dimensions changed:", this.model.get('page_dimensions')), this);
|
58
|
-
this.model.on('change:elements', () => console.log("[DEBUG] elements changed:", this.model.get('elements').length), this);
|
59
|
-
|
60
|
-
// Log initial data
|
61
|
-
console.log("[DEBUG] Initial image_uri:", this.model.get('image_uri') ? 'Present' : 'Empty');
|
62
|
-
console.log("[DEBUG] Initial page_dimensions:", this.model.get('page_dimensions'));
|
63
|
-
console.log("[DEBUG] Initial elements count:", this.model.get('elements').length);
|
64
|
-
}
|
65
|
-
|
66
|
-
remove() {
|
67
|
-
console.log("[DEBUG] InteractiveViewerView: remove() called.");
|
68
|
-
super.remove();
|
69
|
-
}
|
70
|
-
}
|
71
|
-
|
72
|
-
console.log("[DEBUG] viewer_widget module definition returning view.");
|
73
|
-
// Return the view class
|
74
|
-
return {
|
75
|
-
InteractiveViewerView: InteractiveViewerView
|
76
|
-
};
|
77
|
-
}, function(err) {
|
78
|
-
// Error callback for the define function
|
79
|
-
console.error("[DEBUG] Error loading module dependencies:", err);
|
80
|
-
const failedId = err.requireModules && err.requireModules[0];
|
81
|
-
if (failedId === 'react' || failedId === 'react-dom' || failedId === 'htm') {
|
82
|
-
console.error(`[DEBUG] Failed to load CDN dependency: ${failedId}. Check network connection and CDN availability.`);
|
83
|
-
} else if (failedId === '@jupyter-widgets/base') {
|
84
|
-
console.error("[DEBUG] Failed to load @jupyter-widgets/base. Ensure ipywidgets frontend is installed and enabled.");
|
85
|
-
}
|
86
|
-
});
|
87
|
-
|
88
|
-
})();
|
@@ -1,156 +0,0 @@
|
|
1
|
-
docs/index.md,sha256=FG4MYQs-gUR16NQ4XF0AVoQeLuykLeY8XxNwW3h-qUM,5572
|
2
|
-
docs/api/index.md,sha256=4bn8nYklWJuNDrnY-Kt7sf7IejeAEDhcnqYmjH9GJTA,22405
|
3
|
-
docs/assets/favicon.png,sha256=nxca8jM2Y4GxZKzkmagUHO1GpUREK-GRA5LEFue9OOU,284
|
4
|
-
docs/assets/favicon.svg,sha256=nxca8jM2Y4GxZKzkmagUHO1GpUREK-GRA5LEFue9OOU,284
|
5
|
-
docs/assets/logo.svg,sha256=DdSmjHISSLt20mhf7YDXztigE6w_Reja_ATUYNAPL0M,286
|
6
|
-
docs/assets/sample-screen.png,sha256=qfsrThVoPOksn1ACVbLGe_gdPQmY1EsespXPs1Ocm2Y,182330
|
7
|
-
docs/assets/social-preview.png,sha256=AvyzzM8dC0j5SPFF63bvQrxU4GE1f9j-GUNUv0oA9ts,1085
|
8
|
-
docs/assets/social-preview.svg,sha256=AvyzzM8dC0j5SPFF63bvQrxU4GE1f9j-GUNUv0oA9ts,1085
|
9
|
-
docs/assets/javascripts/custom.js,sha256=0NVHGprwiLPFYdYunJcHjOphzk_EhBSNuOUz5Uzdv_k,594
|
10
|
-
docs/assets/stylesheets/custom.css,sha256=PbTp3k77gzUBUQQ01pDXzpNwo4wUv3aJD-SMBQvQItY,1156
|
11
|
-
docs/categorizing-documents/index.md,sha256=tgKfv3DidZysrFhaOEM-FiIVDAzNPPnK02sKaE5pE2I,8196
|
12
|
-
docs/data-extraction/index.md,sha256=LwQ2MJVI5u5ELI51Iq0WUdDo5sl_s18GWG_cBABI8fQ,3430
|
13
|
-
docs/document-qa/index.ipynb,sha256=MXJoFhi8TUKK6ZnRFiUBglLGpMbzwdb7LJYfzw8Gp48,528713
|
14
|
-
docs/document-qa/index.md,sha256=mwuO4tothg0OzBXewnj73QEJu46Udq7f1pQBYrKOHwM,2131
|
15
|
-
docs/element-selection/index.ipynb,sha256=WuKd3bTTOnzBDfbuzkxmJxO6EzM9RAkFXoF0U3-8qRA,1223398
|
16
|
-
docs/element-selection/index.md,sha256=ZUkOD6VVK11K6WQ86FPnTeeco27PrFWtkObKw8j6Fok,7867
|
17
|
-
docs/finetuning/index.md,sha256=Ur3zqSaR0X8PvBCSyI7cFiDv5qZ6Jtv4omBKXCKAzEk,9200
|
18
|
-
docs/installation/index.md,sha256=nd4RZrQFR8_vv7Xm3xAzp7z-CQQr9ffAcGa7yuEYn2U,1594
|
19
|
-
docs/interactive-widget/index.ipynb,sha256=zY1rz5N34OUW-OtgcbI6iiOjlIJqXjVcx9OoNWMjuyU,293111
|
20
|
-
docs/interactive-widget/index.md,sha256=tZbq0uYI7Zwo9mLbhXpqeBriuAjazkIyEJeP-jasJ-Q,259
|
21
|
-
docs/layout-analysis/index.ipynb,sha256=dkS_-cu-KGir5G2LGRcxBThKnW0dfA5nPPnwpoYGFtU,1869093
|
22
|
-
docs/layout-analysis/index.md,sha256=ZnH5yd7B_eOLgGxW_4rNlzQs4Tn3Xx1cK3jX43CSpSM,5390
|
23
|
-
docs/ocr/index.md,sha256=BR8a3_X6zng5yAo8O8isOBhb2Gm9hM9FIasc58aYF78,11137
|
24
|
-
docs/pdf-navigation/index.ipynb,sha256=h6yew0HePXK1_c5FmETqzjBQceUBT0MU-vnXx_y91mo,8018
|
25
|
-
docs/pdf-navigation/index.md,sha256=P3b3tsmOcmRtnfRxpsMeTgwm7vApnH_4le_QIwJd51M,2391
|
26
|
-
docs/regions/index.ipynb,sha256=5A-N5A4v4lcXNptOAeI4i7i9Gx66To-Yus8B816dHBk,1303347
|
27
|
-
docs/regions/index.md,sha256=e4aS_vV2FUFHPc5-Up60Ip8PYBIwT9qkjZcNwaS3JbY,8197
|
28
|
-
docs/tables/index.ipynb,sha256=61I9GwJlOM02Mx5aUtzJpRMh0OkpVlqMuEsO1J2s4go,763784
|
29
|
-
docs/tables/index.md,sha256=MVQpkhcWiFJwhMjfPouRVV0nZIUG-PNwFdspc-E8Xow,4428
|
30
|
-
docs/text-analysis/index.ipynb,sha256=iaup8pcQXGp0ZK3IWi-HHssQLdIzWYGYfvZK5i8yjjg,538024
|
31
|
-
docs/text-analysis/index.md,sha256=02pfZemOgV37izV7H-XzKmHu7AedDKLidQ-sKhYaMVw,3527
|
32
|
-
docs/text-extraction/index.ipynb,sha256=809y9ZamXT3bc3GhwwFyoDnlyEpO-kUZ3tIsZZWyrj8,2537087
|
33
|
-
docs/text-extraction/index.md,sha256=b1KfQpvIEelc8cPbFETUnK92az7iB4b7-LqK2DRH8vw,6985
|
34
|
-
docs/tutorials/01-loading-and-extraction.ipynb,sha256=2vGLM1_2_Xcpn32HvMLXj_Ro8w4HPofSZNpxZ1qPtL8,520140
|
35
|
-
docs/tutorials/01-loading-and-extraction.md,sha256=g40J8GhKz-ikM2URj5MqIatKKj4l5kTFozHeVjxDJQA,2191
|
36
|
-
docs/tutorials/02-finding-elements.ipynb,sha256=yVW3B578mKXkFUWJQnBaDB0SlnNodROjemMbdx-LWBw,524506
|
37
|
-
docs/tutorials/02-finding-elements.md,sha256=qOkjcWUzem05of54aKzKvy-MMzRX_S4CyZisVV-73QM,4162
|
38
|
-
docs/tutorials/03-extracting-blocks.ipynb,sha256=qifBv5bsKcZIQVQAHtl84GqD6Wy-IZiUMkSXURCu3ug,263329
|
39
|
-
docs/tutorials/03-extracting-blocks.md,sha256=_kqvhk6rSL7cGp2MSwTJk8LYlJGbK_r_umnCSBdR8XU,1665
|
40
|
-
docs/tutorials/04-table-extraction.ipynb,sha256=Jj2OzN32I5z1_gfMVgdr2GGyEgbWTgI7harwMWfHxYc,4089
|
41
|
-
docs/tutorials/04-table-extraction.md,sha256=4q4v17VX8K-ZBtWYy0nbWPccyqB_ybd5Vl_IROmxz6Q,2130
|
42
|
-
docs/tutorials/05-excluding-content.ipynb,sha256=EaZwfDJK3BUghY1iwQ4qR8Z9nXf9e8QUeHxvJmZ3xsw,336933
|
43
|
-
docs/tutorials/05-excluding-content.md,sha256=U52SPlc5knqxiyhRokmxrj06T54r2ENyTfP7BMGykhY,3907
|
44
|
-
docs/tutorials/06-document-qa.ipynb,sha256=sGesxP26CMSD2GD-47dXq7EnqK3tlEDzM-uu7sZVR2E,10421
|
45
|
-
docs/tutorials/06-document-qa.md,sha256=PzPPgw0Rkkfe6sfz3XyKD9S9JbQ40qf4bDzCBvwH1P0,3026
|
46
|
-
docs/tutorials/07-layout-analysis.ipynb,sha256=DgyocqPRt9Rxsz-Sjgi153MvvnoDF3Vpsyhq27N72sE,571321
|
47
|
-
docs/tutorials/07-layout-analysis.md,sha256=NAYVzJTecDnXjo_isbPCSUBSn3c-xM1tELct1Zn5GmI,2533
|
48
|
-
docs/tutorials/07-working-with-regions.ipynb,sha256=JMUnjQ_tCBqs4dWIyZ2jNHQCnJkwAzTJuxQVRGBqLqI,67945
|
49
|
-
docs/tutorials/07-working-with-regions.md,sha256=oanbTFSQ-topAVd9kjfkaPiMjHcx6Y8cqyxVbmxLhgs,4365
|
50
|
-
docs/tutorials/08-spatial-navigation.ipynb,sha256=Q0N-az8ZiaMmS42HXMnpDYp97Z_9YPXfM-azC9Sf_f8,186624
|
51
|
-
docs/tutorials/08-spatial-navigation.md,sha256=IMbOYBjayXKE7pHfBjApTxOoKRD8WYj7opf8fsJCtzA,4855
|
52
|
-
docs/tutorials/09-section-extraction.ipynb,sha256=CPBXw28Y7WjWE3HY5SJlUnGlOFQQQ0ZUB65c_uVissA,1101081
|
53
|
-
docs/tutorials/09-section-extraction.md,sha256=Jy_be8ftAl_VPBWl5nEv7_5sKSZPx22DLUcBVHMD3Nc,7832
|
54
|
-
docs/tutorials/10-form-field-extraction.ipynb,sha256=S0S5cdnrioweeKVjdRQnZptUEG-b0VvgrROkOygjAzk,268148
|
55
|
-
docs/tutorials/10-form-field-extraction.md,sha256=t9tPlW36vJEhDrKIsHGg_f3P_MK62DT4-ZK1thKFs4Y,5494
|
56
|
-
docs/tutorials/11-enhanced-table-processing.ipynb,sha256=2i8gQRwkLDH14Yie56-3K5YIhdaR83XbL7m-8pQ5cJU,1394
|
57
|
-
docs/tutorials/11-enhanced-table-processing.md,sha256=2HK-r1UwU7FLn7zWr_pMG7iLk-i0L4U4-t6ubOEeduc,282
|
58
|
-
docs/tutorials/12-ocr-integration.ipynb,sha256=DB1pWJG1vW4aNVdQ2g5w42a71TFThmzObaVQs8h63U0,194084
|
59
|
-
docs/tutorials/12-ocr-integration.md,sha256=-IW4wqLb10eOIWC00NHTGXwtD6jDv7Tp7d-UCOk9SuE,5057
|
60
|
-
docs/tutorials/13-semantic-search.ipynb,sha256=BwFepMsOuHrWTFqczvxikPgTh5o97sYX4uleylnOBmc,54126
|
61
|
-
docs/tutorials/13-semantic-search.md,sha256=nsNjv0ipYUC3YPSqT5d6dga9ZjObEc04Mc8c0-gsRnU,2914
|
62
|
-
docs/visual-debugging/index.ipynb,sha256=MJ92u3Q9sfRCyDAQM4KWmCrs4QhKwIagbn6ytPF83L4,2175800
|
63
|
-
docs/visual-debugging/index.md,sha256=ueGD2kNFhEAgIHt7qxCfrLRLjHcR7NTD3AU9okBhX9k,4176
|
64
|
-
docs/visual-debugging/region.png,sha256=ULAJs3ZTxMjpD9F4w1DKaZXmhxga3KRq3NrUsXgw28s,67835
|
65
|
-
natural_pdf/__init__.py,sha256=aCnIBTYZlUCL1j78sScPX8kXF88JnuQSHsErboTcjnM,2727
|
66
|
-
natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
|
67
|
-
natural_pdf/analyzers/text_options.py,sha256=nE2E1pp4psDPpxmtarvNtEQsgozPkyFRjv0TVP2HTyU,2865
|
68
|
-
natural_pdf/analyzers/text_structure.py,sha256=9h8hKRz0JWnr13xQr3b4FFr_-hDIjue07WvG7LmT8nc,12827
|
69
|
-
natural_pdf/analyzers/utils.py,sha256=Lgub1kYSTOnNxeLO1klStHLwH-GIuT4vpdqyVRF-Mcg,2084
|
70
|
-
natural_pdf/analyzers/layout/__init__.py,sha256=oq1uJ5UkGGMbBKGirV1aRKK3hxAUyjTLywYkPCQH1f0,33
|
71
|
-
natural_pdf/analyzers/layout/base.py,sha256=9dCR758mAuz7ExlHJ-gwnPnETaM4GZV3W1IRei_t13s,6815
|
72
|
-
natural_pdf/analyzers/layout/docling.py,sha256=4BJYyNVR6VegZGxyisvNIBBRvVk6YKPyDVs7ZdVfzEU,12676
|
73
|
-
natural_pdf/analyzers/layout/gemini.py,sha256=CzJPWyyEghuCNpu2CMb6OA6FtBGdGhXspHjsjy6I4JE,11195
|
74
|
-
natural_pdf/analyzers/layout/layout_analyzer.py,sha256=6aed1qz5jpndOiakXCBRZAcnyG_waeXi3WPuP5fRvh4,14046
|
75
|
-
natural_pdf/analyzers/layout/layout_manager.py,sha256=Vh8EKiszKqjELofxQ1eiVLKVjibyjBsZpLFzTf0_21E,11179
|
76
|
-
natural_pdf/analyzers/layout/layout_options.py,sha256=s7xr4brE3OutE6aYNAi2PniRy1p2w8a342C2xGpvX2s,3777
|
77
|
-
natural_pdf/analyzers/layout/paddle.py,sha256=gTI9ZqNd5-t4H5IByGfL32WgcE6JrdchW6jRiGI6ulM,13375
|
78
|
-
natural_pdf/analyzers/layout/surya.py,sha256=vhji6ynHPMyQLHuYRPQcplNi7m_lG4P4NYtWv6MzcME,13556
|
79
|
-
natural_pdf/analyzers/layout/tatr.py,sha256=-GJhMy4d0yx6egkO9-ULAIdQkkQRyAKExoIta-b256U,12971
|
80
|
-
natural_pdf/analyzers/layout/yolo.py,sha256=ANo2U4EZgeN2eYKM1bZIuysiuJLgwl4JeQchrRxOKwA,8388
|
81
|
-
natural_pdf/classification/manager.py,sha256=pLcEDe1a5QARJCMimE5Ul_HKZD4jX-eREUCeUuniA0U,16445
|
82
|
-
natural_pdf/classification/mixin.py,sha256=aySe0bEjkaI9qYDmSkQe536w0Xrxcg4j6k3JGPvj-cY,6737
|
83
|
-
natural_pdf/classification/results.py,sha256=Hn-3xDSThR8x7XpoTlQLWpX6JE1VHVe2QpOeWNY2Ycw,2949
|
84
|
-
natural_pdf/collections/mixins.py,sha256=BXk4o_PRrczSXjR7vorIEe4WyEKyms4_qYnY8ZAZd-A,2737
|
85
|
-
natural_pdf/collections/pdf_collection.py,sha256=F_4Z-nrL9wFQ-mt4T4cJ2ERVUnkh2kyQdmOV8ASBgoM,27281
|
86
|
-
natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
|
87
|
-
natural_pdf/core/element_manager.py,sha256=KZ9yNtpFwuImDWmFUXgISAoWQdSib93E4t3ILUZzIic,24805
|
88
|
-
natural_pdf/core/highlighting_service.py,sha256=CTVd7y-fpIreFSe70cTpMu1Pwl6HKMtTHp0bh2U7VXk,32609
|
89
|
-
natural_pdf/core/page.py,sha256=4iykmXdVwmSQOpGukTxfJYU-5XEgSafNbKsnIedVaGA,94051
|
90
|
-
natural_pdf/core/pdf.py,sha256=yPAaOv5vNKZlC9oVk5sKsFxb4LdoRygz_Qkp2EaDtOY,43074
|
91
|
-
natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
|
92
|
-
natural_pdf/elements/base.py,sha256=UtoSD-c_s0yiLpWZrIIJjeJ9MgGz_4R0UHYcsFWH6bc,35157
|
93
|
-
natural_pdf/elements/collections.py,sha256=CCQVgglxWLfhuy4FZvVHXdmgiZxU27Ay7Myt8ttQYWg,79467
|
94
|
-
natural_pdf/elements/line.py,sha256=7cow3xMUKhAj7zoQz7OaB1eIH2_a8B__LB7iGJ4Mb0o,4612
|
95
|
-
natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
|
96
|
-
natural_pdf/elements/region.py,sha256=f7ArCPizkosIei9ePixHYqedK3K6LBVJotwKZ-y33a0,74058
|
97
|
-
natural_pdf/elements/text.py,sha256=ZpPluwZtAVfOkoeM4Fm2PDsN87BBZduURZaFWns03RM,11158
|
98
|
-
natural_pdf/exporters/__init__.py,sha256=7MnvRLLQdwtg-ULu-8uK8C84GsKiJamyhRw_GgWhw7k,151
|
99
|
-
natural_pdf/exporters/base.py,sha256=s-NpHoH81x80GQxs0oqjdhPGrzbUa8npjnK8apKOsHQ,2115
|
100
|
-
natural_pdf/exporters/paddleocr.py,sha256=vyVetJ6RgEY46qS5Yl5mKl4cSJadwOxLWGGsdiDjico,16248
|
101
|
-
natural_pdf/exporters/searchable_pdf.py,sha256=qsaPsnbOOaZHA_aplfZbwQnBoK9KghWm-wzbyRRomeY,16859
|
102
|
-
natural_pdf/extraction/manager.py,sha256=YH5dyUorMItGxuaZ-DhuJD5Sh_Ozjj0fa-WBMcQw1E0,4903
|
103
|
-
natural_pdf/extraction/mixin.py,sha256=6CWYyutGcKCxFVYun8yXC4H1IZWLMXaeYZ-cWJRx5FE,11430
|
104
|
-
natural_pdf/extraction/result.py,sha256=ihY1g_C2hsMACYqU7bcvAKRijuh-FHVtpnn0uoP--pk,1047
|
105
|
-
natural_pdf/ocr/__init__.py,sha256=jKaDbo13CdCDcas1WiBmg5gjBvVeG-Z9uaeYxyzvaNY,2464
|
106
|
-
natural_pdf/ocr/engine.py,sha256=ZBC1tZNM5EDbGDJJmZI9mNHr4nCMLEZvUFhiJq8GdF4,8741
|
107
|
-
natural_pdf/ocr/engine_easyocr.py,sha256=9TbxJjmhWFrzM8mcNnZjoRtIDr6gwpuwKm4-Zfub2-8,9281
|
108
|
-
natural_pdf/ocr/engine_paddle.py,sha256=2nIrvLBBAiZG1BxVo3eFVJulA6YGoOTXw_RN98p_BUk,6184
|
109
|
-
natural_pdf/ocr/engine_surya.py,sha256=iySjG-Dahgh0cLICfbMtOcwUpRFcZjo-5Ed5Zwz-o5Y,4805
|
110
|
-
natural_pdf/ocr/ocr_factory.py,sha256=IFccj0BB75YGV4hjcy4ECtGQX_JQzdptpvDFfeGxxgI,4391
|
111
|
-
natural_pdf/ocr/ocr_manager.py,sha256=ivk4Aqr5gsDJWiCxP1-FLkhuvfJiQtilwbPtgIPm--4,13320
|
112
|
-
natural_pdf/ocr/ocr_options.py,sha256=BcPVwJGYE3vMug7wsVh_ARUJlm_4emz9ynOAwYgwHBk,4257
|
113
|
-
natural_pdf/ocr/utils.py,sha256=4b_A47hfynfV00iR8I9OWmXCzDzRvSdEkQhZLcSV4kQ,4394
|
114
|
-
natural_pdf/qa/__init__.py,sha256=Pjo62JTnUNEjGNsC437mvsS5KQ5m7X_BibGvavR9AW0,108
|
115
|
-
natural_pdf/qa/document_qa.py,sha256=Jw4yyq3Vifn57D0ANmOfUlZeG8CJjBkItZBV-8ZAmos,15111
|
116
|
-
natural_pdf/search/__init__.py,sha256=EB_HRwlktJn5WGPVtSaRbOQNjLAZTxujeYf_eN-zd2U,4191
|
117
|
-
natural_pdf/search/haystack_search_service.py,sha256=6RjTFWbTo3gaO-90IF6PEuo_9WRwOdj232eWn3OT0BQ,29270
|
118
|
-
natural_pdf/search/haystack_utils.py,sha256=UI4eu3SVieGR_QnBtLhP8Fjtt2AJgeLgxrpa_dBmD6k,19289
|
119
|
-
natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzPkK0a8QA,3566
|
120
|
-
natural_pdf/search/search_service_protocol.py,sha256=ybNcF_NoLZuIx0rb4XB1dsDl3o_LAaWR1fVVKld2TxI,6818
|
121
|
-
natural_pdf/search/searchable_mixin.py,sha256=M2a6FaFVM0vcfh7FgjDH6BLhS-7ggeVpcfft4OOBDxY,26390
|
122
|
-
natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
|
123
|
-
natural_pdf/selectors/parser.py,sha256=AKXGv4MaZDiaWT_jSfn_vU-qVlECB8b-IxnyocXtaaE,22671
|
124
|
-
natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
|
125
|
-
natural_pdf/templates/finetune/fine_tune_paddleocr.md,sha256=H6Wmu3Nvi2qKK-rPwr8KUZfILzXz8VmWyCWYOTe6QCI,14764
|
126
|
-
natural_pdf/templates/spa/index.html,sha256=6hLTp07OeV5Q4jUMp5Sgl-dwfBs3oPzBxqphG4kEs24,787
|
127
|
-
natural_pdf/templates/spa/words.txt,sha256=vkGtl5Y7-Nq-3Vhx1daRWWF1Jp1UCVaw-ZZaiFwrurk,2493885
|
128
|
-
natural_pdf/templates/spa/css/style.css,sha256=Qdl0U3L5HMyhBDNzyRPklfb3OxW6rMxCfQbzO8i8IW4,7643
|
129
|
-
natural_pdf/templates/spa/js/app.js,sha256=Efb7NmcTN9RLdLwKpDcU6CG5Ix0laHtzRHmfUlDMJXw,19679
|
130
|
-
natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
|
131
|
-
natural_pdf/utils/debug.py,sha256=lk_6qzxan8NagjEtJEZpZ2MS30SO8ce6iznBxmA0xgk,995
|
132
|
-
natural_pdf/utils/highlighting.py,sha256=EIY6ihVGtUTS_DjWyxpnr_UXpcR4btC1KhSGQ9VUfKg,698
|
133
|
-
natural_pdf/utils/identifiers.py,sha256=n61viCQiMlf5-E_jsPLe-FkPBdKkMKv-gfs5tGqlKiw,1117
|
134
|
-
natural_pdf/utils/locks.py,sha256=E_Fb6GnRNq-tF5aE7jnllkpidsNr8LXPhSaqgr56Ks4,215
|
135
|
-
natural_pdf/utils/packaging.py,sha256=HSgpubpHICU75L4ZAZPU8iOjium055XWnklV9_YqoCA,21579
|
136
|
-
natural_pdf/utils/reading_order.py,sha256=s3DsYq_3g_1YA07qhd4BGEjeIRTeyGtnwc_hNtSzwBY,7290
|
137
|
-
natural_pdf/utils/text_extraction.py,sha256=qZfOuO57XeKg7p-Q7yzTBMTrpAvDRslYXjDSjiJLStI,9545
|
138
|
-
natural_pdf/utils/tqdm_utils.py,sha256=bKWvsoAOl0lPOPLJC2hkTtkdxBf5f9aVtcA3DmUE19M,1570
|
139
|
-
natural_pdf/utils/visualization.py,sha256=5GbhxtvZW-77ONVnICupg-s2D-OaxLZNqkKlOrQESK4,8593
|
140
|
-
natural_pdf/widgets/__init__.py,sha256=O2fSDo604wDAP6UwUkmBq3eT91RSqHwBpAOQXq92S8s,214
|
141
|
-
natural_pdf/widgets/viewer.py,sha256=Aiw6kuBc0WkhcZrPNKyLNzzWbmtmU6rvOmHV0IuXCBk,40862
|
142
|
-
natural_pdf/widgets/frontend/viewer.js,sha256=w8ywfz_IOAAv2nP_qaf2VBUkF1KhjT3zorhJxM1-CfU,4371
|
143
|
-
natural_pdf-0.1.8.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
|
144
|
-
notebooks/Examples.ipynb,sha256=l4YMtMEx_DWBzWIjl9CmBkWTo0g_nK8l_XWOyzYooQM,4275170
|
145
|
-
pdfs/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
146
|
-
pdfs/01-practice.pdf,sha256=dxWyJIa2cm7bALE3BWDJ2dg3inyFlo1n8ntVyy0hkTo,7906
|
147
|
-
pdfs/0500000US42001.pdf,sha256=VHn5gxlysgD6oJUCndeWXe_RwOrOraO1uIRt_fu8YNY,315883
|
148
|
-
pdfs/0500000US42007.pdf,sha256=pTfu_IVKvHpv9WUyN3QSiGevAOpPZmnr4QL1z-rYQ4E,1168633
|
149
|
-
pdfs/2014 Statistics.pdf,sha256=B-30OQVjqj_3718-G9cGUefNddnz-MosPdHAzfGfkcc,9559
|
150
|
-
pdfs/2019 Statistics.pdf,sha256=reuSJxvAlx9_P-pW7IPqzox0jFCxSPbK1i1-WFu-uGA,511439
|
151
|
-
pdfs/Atlanta_Public_Schools_GA_sample.pdf,sha256=PLBh_uWJQH0MnBaSm5ng5Ima63_m6Mi11CjdravB_S8,137689
|
152
|
-
pdfs/needs-ocr.pdf,sha256=vusKiLxSOlELUTetfZfaotNU54RtMj9PCzGfLc2cuNs,139305
|
153
|
-
natural_pdf-0.1.8.dist-info/METADATA,sha256=Qz_ePmFWt4poceUJnVcldvhJoIRWuo2lEIEoVp-mnwE,7030
|
154
|
-
natural_pdf-0.1.8.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
155
|
-
natural_pdf-0.1.8.dist-info/top_level.txt,sha256=7nDKUnpkN7B8cBI7DEpW5JM8S7OcOgHw3jXH-1iCX2o,32
|
156
|
-
natural_pdf-0.1.8.dist-info/RECORD,,
|