natural-pdf 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/analyzers/guides.py +159 -3
- natural_pdf/core/highlighting_service.py +8 -8
- natural_pdf/core/page.py +135 -4
- natural_pdf/core/page_collection.py +37 -0
- natural_pdf/core/page_groupby.py +229 -0
- natural_pdf/core/render_spec.py +18 -4
- natural_pdf/elements/base.py +54 -6
- natural_pdf/elements/element_collection.py +1 -0
- natural_pdf/elements/region.py +2 -2
- natural_pdf/elements/text.py +5 -0
- natural_pdf/extraction/manager.py +8 -14
- natural_pdf/extraction/mixin.py +35 -21
- natural_pdf/selectors/parser.py +2 -2
- natural_pdf/tables/result.py +37 -0
- {natural_pdf-0.2.0.dist-info → natural_pdf-0.2.2.dist-info}/METADATA +2 -2
- {natural_pdf-0.2.0.dist-info → natural_pdf-0.2.2.dist-info}/RECORD +22 -21
- {natural_pdf-0.2.0.dist-info → natural_pdf-0.2.2.dist-info}/top_level.txt +0 -2
- optimization/performance_analysis.py +1 -1
- tools/bad_pdf_eval/analyser.py +1 -1
- {natural_pdf-0.2.0.dist-info → natural_pdf-0.2.2.dist-info}/WHEEL +0 -0
- {natural_pdf-0.2.0.dist-info → natural_pdf-0.2.2.dist-info}/entry_points.txt +0 -0
- {natural_pdf-0.2.0.dist-info → natural_pdf-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: natural-pdf
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.2
|
4
4
|
Summary: A more intuitive interface for working with PDFs
|
5
5
|
Author-email: Jonathan Soma <jonathan.soma@gmail.com>
|
6
6
|
License-Expression: MIT
|
@@ -14,7 +14,7 @@ License-File: LICENSE
|
|
14
14
|
Requires-Dist: scikit-learn
|
15
15
|
Requires-Dist: markdown
|
16
16
|
Requires-Dist: pandas
|
17
|
-
Requires-Dist: pdfplumber
|
17
|
+
Requires-Dist: pdfplumber>=0.11.7
|
18
18
|
Requires-Dist: colormath2
|
19
19
|
Requires-Dist: pillow
|
20
20
|
Requires-Dist: colour
|
@@ -2,7 +2,7 @@ natural_pdf/__init__.py,sha256=N4pR0LbuPEnUYFZqbdVqc_FGKldgwPQc1wjJhYKTBBM,3417
|
|
2
2
|
natural_pdf/cli.py,sha256=SkPwhhMM-GhLsj3O1n1Agxz4KOxcZ08sj8hVQSFJB5c,4064
|
3
3
|
natural_pdf/text_mixin.py,sha256=eFCiHj6Okcw3aum4955BepcI2NPRalkf9UFFVTc_H30,4012
|
4
4
|
natural_pdf/analyzers/__init__.py,sha256=3XGoNq3OgiVkZP7tOdeP5XVUl7fDgyztdA8DlOcMLXg,1138
|
5
|
-
natural_pdf/analyzers/guides.py,sha256=
|
5
|
+
natural_pdf/analyzers/guides.py,sha256=9FUbxk4XBOyktXgq9q5-bB949JFrzT1kBPikg2ENoIw,150032
|
6
6
|
natural_pdf/analyzers/shape_detection_mixin.py,sha256=mgpyJ4jIulz9l9HCqThabJIsLSrXh9BB2AmLxUoHmw0,62584
|
7
7
|
natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
|
8
8
|
natural_pdf/analyzers/text_structure.py,sha256=3WWusi-BI0krUnJxB05DD6XmKj5qRNvQBqH7zOQGm1M,28451
|
@@ -26,25 +26,26 @@ natural_pdf/classification/results.py,sha256=5ha77CxK0GYwkBMJbvUBZkBjsL5GpOveIZD
|
|
26
26
|
natural_pdf/collections/mixins.py,sha256=u4KtnlUZZYQ74e0OXAniOv9RtuA6FhwBxsLMJLjdbpQ,5169
|
27
27
|
natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
|
28
28
|
natural_pdf/core/element_manager.py,sha256=DRZvntd99wjXy6KeDjCq5uRhjMftZop9QklOZqlUH8M,55349
|
29
|
-
natural_pdf/core/highlighting_service.py,sha256=
|
30
|
-
natural_pdf/core/page.py,sha256=
|
31
|
-
natural_pdf/core/page_collection.py,sha256=
|
29
|
+
natural_pdf/core/highlighting_service.py,sha256=mhHEomIlHQM1lVmEcUMJ4xtHsvCRb3rMroW1d1Gqs-M,67942
|
30
|
+
natural_pdf/core/page.py,sha256=M_V0IXahopIN45ENmvIm4m_WdnrjYECmXe7xhUQtjQI,142455
|
31
|
+
natural_pdf/core/page_collection.py,sha256=coVUsp4uLR2GImLbuGFpBIYcU952eJLfBQNgTmkOSzU,52486
|
32
|
+
natural_pdf/core/page_groupby.py,sha256=550ME6kd-h-2u75oUIIIqTYsmh8VvdQO1nXXioL8J6A,7378
|
32
33
|
natural_pdf/core/pdf.py,sha256=q54DyhXwAS_zAmsBd3PsCezu1wyQOYmGmB3iKfP8gAM,101884
|
33
34
|
natural_pdf/core/pdf_collection.py,sha256=8tM0qVWS1L5Hwv5cXuZ2X8znAYOjKmlERX62bksDlJU,30144
|
34
|
-
natural_pdf/core/render_spec.py,sha256=
|
35
|
+
natural_pdf/core/render_spec.py,sha256=SgT6bHR3yduZ_-JhFWRFmakUD74NPQJ8q1lY6iB3prQ,12916
|
35
36
|
natural_pdf/describe/__init__.py,sha256=kIV7ORmWWB1SAur7nK2aAwR-wHqSedhKfUsaUl4hG0A,586
|
36
37
|
natural_pdf/describe/base.py,sha256=Of9WVo9XuShXoeyJr0RN2CpLhF_CeiOjazl-or53RKU,18173
|
37
38
|
natural_pdf/describe/elements.py,sha256=JicXC9SJmmasqxalpCXA47-kVwv-6JnR3Xiu778aNHM,12634
|
38
39
|
natural_pdf/describe/mixin.py,sha256=rkX14aGrSz7Jvxx8Rbxv3eSfbO-_29DipwpstrV2pDQ,3109
|
39
40
|
natural_pdf/describe/summary.py,sha256=cfT4ZQkeatCDAOwWPwhtEVXisNgk6E57fAXAnoRysSU,7645
|
40
41
|
natural_pdf/elements/__init__.py,sha256=ICNikmLeIEuSYypz-KnkBn8xR1hR7rge4hsa1KLkyWY,42
|
41
|
-
natural_pdf/elements/base.py,sha256=
|
42
|
-
natural_pdf/elements/element_collection.py,sha256=
|
42
|
+
natural_pdf/elements/base.py,sha256=WUwYDzeeGNkr26lWKm8PqGlW9WQIPoteWYIpvlcxTrs,53939
|
43
|
+
natural_pdf/elements/element_collection.py,sha256=uQoZ2GFCnru0LCiv5zr6wIu2IWgM0j2m44qjsJPNPbk,101340
|
43
44
|
natural_pdf/elements/image.py,sha256=zu-P2Y8fRoEXf6IeZU0EYRWsgZ6I_a5vy1FA3VXTGkQ,1424
|
44
45
|
natural_pdf/elements/line.py,sha256=TFn7KXjPT_jUQyQyabU0F7XYU4dC-qadwodJMZF4DCU,3844
|
45
46
|
natural_pdf/elements/rect.py,sha256=0lNkVkPkvbRbrFED856RXoUcTcDkeeOIs5xldKGAQT8,3324
|
46
|
-
natural_pdf/elements/region.py,sha256=
|
47
|
-
natural_pdf/elements/text.py,sha256=
|
47
|
+
natural_pdf/elements/region.py,sha256=ClL2vxx2aVoAecaAlkUDZ2ygvUiP8oTa-xfIclm2Eg8,155286
|
48
|
+
natural_pdf/elements/text.py,sha256=829uSJv9E-8cC6T6iR_Va7Xtv54pJoyRN78fq4NN1d4,20687
|
48
49
|
natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
|
49
50
|
natural_pdf/exporters/__init__.py,sha256=QffoARekR6WzXEd05oxOytly4qPdBizuIF-SUkeFpig,643
|
50
51
|
natural_pdf/exporters/base.py,sha256=379sioW_hbkGb21sEVuJhbkkDO5MFsFtTUNO5TgG2YU,2101
|
@@ -56,8 +57,8 @@ natural_pdf/exporters/searchable_pdf.py,sha256=7RDNTV2jK5b5PhZz-v-kpYGTDCXu8FBgX
|
|
56
57
|
natural_pdf/exporters/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
57
58
|
natural_pdf/exporters/data/pdf.ttf,sha256=x4RUIJJaI9iO2DCmOVe4r4Wmao2vjZ_JDoQ2c7LvGlk,572
|
58
59
|
natural_pdf/exporters/data/sRGB.icc,sha256=KpLUuuRQt22LCqQhk9-XTXX2Jzjs6_dPAcXnWxKpV5Y,6922
|
59
|
-
natural_pdf/extraction/manager.py,sha256=
|
60
|
-
natural_pdf/extraction/mixin.py,sha256=
|
60
|
+
natural_pdf/extraction/manager.py,sha256=R-wGe9PGky6r4BTSUPMXf3N2l12kycku3GJKEd45eFU,4701
|
61
|
+
natural_pdf/extraction/mixin.py,sha256=dBcp96R8zMQqaRHiB8vpyad8GR89gv5RPXlr8Mt0ais,25427
|
61
62
|
natural_pdf/extraction/result.py,sha256=PDaCCN2LQBbHsZy0_lrQ0ROeMsnmH1WRoXWOjk9M2o4,1825
|
62
63
|
natural_pdf/flows/__init__.py,sha256=cUN4A8hTDLZSRr4PO2W_lR4z6hWpbNG8Seox-IIcrLU,277
|
63
64
|
natural_pdf/flows/collections.py,sha256=ErkHWdX6W_y1SjkcA_bGM0uUYRGPWWpRkHip6LHpej0,25740
|
@@ -84,9 +85,9 @@ natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzP
|
|
84
85
|
natural_pdf/search/search_service_protocol.py,sha256=u8pbuWP96fnQEe6mnreY9DrdiDAHP6ZCY7phvSbFlP8,6697
|
85
86
|
natural_pdf/search/searchable_mixin.py,sha256=hqQ_AuID5eTGRCtKYdFLZ1zF35y73uk3x1M1VW9Il8U,23514
|
86
87
|
natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
|
87
|
-
natural_pdf/selectors/parser.py,sha256=
|
88
|
+
natural_pdf/selectors/parser.py,sha256=pw0M8ICKPMOzZPzWpLsQMG_lnl8PewGIdIG3ciukabk,38877
|
88
89
|
natural_pdf/tables/__init__.py,sha256=sCvCGbGsL6BiqlNxAYfVv003bIDLI11FmjHhaWfcU6w,104
|
89
|
-
natural_pdf/tables/result.py,sha256=
|
90
|
+
natural_pdf/tables/result.py,sha256=1pcelNZvOb6Anlwj08Z1XU-YK1ihlCsLpYMRA3Zc4JM,7242
|
90
91
|
natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
|
91
92
|
natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
|
92
93
|
natural_pdf/utils/bidi_mirror.py,sha256=jJEES0xDrMfo5Me8kHMxHv4COS51PitnYi2EvKv3HCE,1151
|
@@ -101,14 +102,14 @@ natural_pdf/utils/text_extraction.py,sha256=CCwPTmMoTgtQt2P00X_ADIf6ZGNfxvjCO9FO
|
|
101
102
|
natural_pdf/utils/visualization.py,sha256=zhZEHgYnZFuX7YxTHXF8Y3D97uHp2beTKMaC-JkCFwk,22364
|
102
103
|
natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
|
103
104
|
natural_pdf/widgets/viewer.py,sha256=KW3JogdR2TMg2ECUMYp8hwd060hfg8EsYBWxb5IEzBY,24942
|
104
|
-
natural_pdf-0.2.
|
105
|
+
natural_pdf-0.2.2.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
|
105
106
|
optimization/memory_comparison.py,sha256=0i_foFSRmppj-fY069qjwH36s_zkx-1L2ASAAlepWzA,6541
|
106
107
|
optimization/pdf_analyzer.py,sha256=HjrmTgu2qchxPeDckc5kjgxppGwd40UESrYS9Myj7pY,19352
|
107
|
-
optimization/performance_analysis.py,sha256=
|
108
|
+
optimization/performance_analysis.py,sha256=JBXnR9hc7Ix7YCnt3EJPSpsyqIUgKsc7GEffQ_TDCBk,13033
|
108
109
|
optimization/test_cleanup_methods.py,sha256=PmLOL4MRgvV0j_DW9W1TS8MsGGgu57QCuq6_5y7zK3s,6209
|
109
110
|
optimization/test_memory_fix.py,sha256=A3knK74fNhvHknDbLhbTmA276x1ifl-3ivJ_7BhVSTI,6170
|
110
111
|
tools/bad_pdf_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
111
|
-
tools/bad_pdf_eval/analyser.py,sha256=
|
112
|
+
tools/bad_pdf_eval/analyser.py,sha256=oqSTo3NLyignp_XdCO9_SRCUUXMU8lfgDavKYZYNxws,13690
|
112
113
|
tools/bad_pdf_eval/collate_summaries.py,sha256=L_YsdiqmwGIHYWTVJqo6gyazyn3GIQgpfGGKk8uwckk,5159
|
113
114
|
tools/bad_pdf_eval/compile_attempts_markdown.py,sha256=ArFDZaSa9dz0ez0lsNlbUSK4hbvB3___DlfwqPEAZpY,4359
|
114
115
|
tools/bad_pdf_eval/eval_suite.py,sha256=zcapsGwO-VJ2OupJnPYKbrkzvzdGdoh2DZPK19bfkQg,4450
|
@@ -118,8 +119,8 @@ tools/bad_pdf_eval/llm_enrich.py,sha256=mCh4KGi1HmIkzGjj5rrHz1Osd7sEX1IZ_FW08H1t
|
|
118
119
|
tools/bad_pdf_eval/llm_enrich_with_retry.py,sha256=XUtPF1hUvqd3frDXT0wDTXoonuAivhjM5vgFdZ-tm0A,9373
|
119
120
|
tools/bad_pdf_eval/reporter.py,sha256=e1g__mkSB4q02p3mGWOwMhvFs7F2HJosNBxup0-LkyU,400
|
120
121
|
tools/bad_pdf_eval/utils.py,sha256=hR95XQ7qf7Cu6BdyX0L7ggGVx-ah5sK0jHWblTJUUic,4896
|
121
|
-
natural_pdf-0.2.
|
122
|
-
natural_pdf-0.2.
|
123
|
-
natural_pdf-0.2.
|
124
|
-
natural_pdf-0.2.
|
125
|
-
natural_pdf-0.2.
|
122
|
+
natural_pdf-0.2.2.dist-info/METADATA,sha256=uLGyhgV-iSjcvpvaj9s8ArQUzg1UTAF6bPXTf4BuZSE,6959
|
123
|
+
natural_pdf-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
124
|
+
natural_pdf-0.2.2.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
|
125
|
+
natural_pdf-0.2.2.dist-info/top_level.txt,sha256=80t0F2ZeX4vN4Ke5iTflcOk_PN_0USn33ha3X6X86Ik,36
|
126
|
+
natural_pdf-0.2.2.dist-info/RECORD,,
|
tools/bad_pdf_eval/analyser.py
CHANGED
@@ -39,7 +39,7 @@ class BadPDFAnalyzer:
|
|
39
39
|
# ---------------------------------------------------------------------
|
40
40
|
def _save_page_image(self, page, page_num: int) -> Path:
|
41
41
|
"""Render and save page image as high-quality JPG."""
|
42
|
-
img: Image.Image = page.
|
42
|
+
img: Image.Image = page.render(resolution=self.resolution)
|
43
43
|
if img.mode != "RGB":
|
44
44
|
img = img.convert("RGB")
|
45
45
|
img_path = self.output_dir / f"page_{page_num:04d}.jpg"
|
File without changes
|
File without changes
|
File without changes
|