natural-pdf 0.2.1.dev0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.1.dev0
3
+ Version: 0.2.2
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -14,7 +14,7 @@ License-File: LICENSE
14
14
  Requires-Dist: scikit-learn
15
15
  Requires-Dist: markdown
16
16
  Requires-Dist: pandas
17
- Requires-Dist: pdfplumber
17
+ Requires-Dist: pdfplumber>=0.11.7
18
18
  Requires-Dist: colormath2
19
19
  Requires-Dist: pillow
20
20
  Requires-Dist: colour
@@ -2,7 +2,7 @@ natural_pdf/__init__.py,sha256=N4pR0LbuPEnUYFZqbdVqc_FGKldgwPQc1wjJhYKTBBM,3417
2
2
  natural_pdf/cli.py,sha256=SkPwhhMM-GhLsj3O1n1Agxz4KOxcZ08sj8hVQSFJB5c,4064
3
3
  natural_pdf/text_mixin.py,sha256=eFCiHj6Okcw3aum4955BepcI2NPRalkf9UFFVTc_H30,4012
4
4
  natural_pdf/analyzers/__init__.py,sha256=3XGoNq3OgiVkZP7tOdeP5XVUl7fDgyztdA8DlOcMLXg,1138
5
- natural_pdf/analyzers/guides.py,sha256=N8fetR3jrDXzeHtIlbxg8BEbthB_lS0L8yhzVXHqiGQ,143245
5
+ natural_pdf/analyzers/guides.py,sha256=9FUbxk4XBOyktXgq9q5-bB949JFrzT1kBPikg2ENoIw,150032
6
6
  natural_pdf/analyzers/shape_detection_mixin.py,sha256=mgpyJ4jIulz9l9HCqThabJIsLSrXh9BB2AmLxUoHmw0,62584
7
7
  natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
8
8
  natural_pdf/analyzers/text_structure.py,sha256=3WWusi-BI0krUnJxB05DD6XmKj5qRNvQBqH7zOQGm1M,28451
@@ -26,25 +26,26 @@ natural_pdf/classification/results.py,sha256=5ha77CxK0GYwkBMJbvUBZkBjsL5GpOveIZD
26
26
  natural_pdf/collections/mixins.py,sha256=u4KtnlUZZYQ74e0OXAniOv9RtuA6FhwBxsLMJLjdbpQ,5169
27
27
  natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
28
28
  natural_pdf/core/element_manager.py,sha256=DRZvntd99wjXy6KeDjCq5uRhjMftZop9QklOZqlUH8M,55349
29
- natural_pdf/core/highlighting_service.py,sha256=k_SMCINeK4aUwfQLmaiyipCPL8vv33ibrCyqtlni8Bc,67921
30
- natural_pdf/core/page.py,sha256=nQDUR4eKsUhPmEnofjmJRPITQ1RJoK3ITC0Lrtt4AHw,135510
31
- natural_pdf/core/page_collection.py,sha256=9ff7IfO04bUkJCBZv__Z9G8A-NY7mR3OujVl54lH-FE,50985
29
+ natural_pdf/core/highlighting_service.py,sha256=mhHEomIlHQM1lVmEcUMJ4xtHsvCRb3rMroW1d1Gqs-M,67942
30
+ natural_pdf/core/page.py,sha256=M_V0IXahopIN45ENmvIm4m_WdnrjYECmXe7xhUQtjQI,142455
31
+ natural_pdf/core/page_collection.py,sha256=coVUsp4uLR2GImLbuGFpBIYcU952eJLfBQNgTmkOSzU,52486
32
+ natural_pdf/core/page_groupby.py,sha256=550ME6kd-h-2u75oUIIIqTYsmh8VvdQO1nXXioL8J6A,7378
32
33
  natural_pdf/core/pdf.py,sha256=q54DyhXwAS_zAmsBd3PsCezu1wyQOYmGmB3iKfP8gAM,101884
33
34
  natural_pdf/core/pdf_collection.py,sha256=8tM0qVWS1L5Hwv5cXuZ2X8znAYOjKmlERX62bksDlJU,30144
34
- natural_pdf/core/render_spec.py,sha256=j77UrHA_g_e0RbAyn-4hkjFtqm_oaTe5KRd_Ii9izf4,12243
35
+ natural_pdf/core/render_spec.py,sha256=SgT6bHR3yduZ_-JhFWRFmakUD74NPQJ8q1lY6iB3prQ,12916
35
36
  natural_pdf/describe/__init__.py,sha256=kIV7ORmWWB1SAur7nK2aAwR-wHqSedhKfUsaUl4hG0A,586
36
37
  natural_pdf/describe/base.py,sha256=Of9WVo9XuShXoeyJr0RN2CpLhF_CeiOjazl-or53RKU,18173
37
38
  natural_pdf/describe/elements.py,sha256=JicXC9SJmmasqxalpCXA47-kVwv-6JnR3Xiu778aNHM,12634
38
39
  natural_pdf/describe/mixin.py,sha256=rkX14aGrSz7Jvxx8Rbxv3eSfbO-_29DipwpstrV2pDQ,3109
39
40
  natural_pdf/describe/summary.py,sha256=cfT4ZQkeatCDAOwWPwhtEVXisNgk6E57fAXAnoRysSU,7645
40
41
  natural_pdf/elements/__init__.py,sha256=ICNikmLeIEuSYypz-KnkBn8xR1hR7rge4hsa1KLkyWY,42
41
- natural_pdf/elements/base.py,sha256=jEBw5cq4mzgOYeEBrWPml2RBuVmOnwBNA4nTd7pLmMI,52292
42
- natural_pdf/elements/element_collection.py,sha256=av2YKTxEB5lHYqw1A6aYoN-Uef2qzT9z6ibBAbJMPo4,101322
42
+ natural_pdf/elements/base.py,sha256=WUwYDzeeGNkr26lWKm8PqGlW9WQIPoteWYIpvlcxTrs,53939
43
+ natural_pdf/elements/element_collection.py,sha256=uQoZ2GFCnru0LCiv5zr6wIu2IWgM0j2m44qjsJPNPbk,101340
43
44
  natural_pdf/elements/image.py,sha256=zu-P2Y8fRoEXf6IeZU0EYRWsgZ6I_a5vy1FA3VXTGkQ,1424
44
45
  natural_pdf/elements/line.py,sha256=TFn7KXjPT_jUQyQyabU0F7XYU4dC-qadwodJMZF4DCU,3844
45
46
  natural_pdf/elements/rect.py,sha256=0lNkVkPkvbRbrFED856RXoUcTcDkeeOIs5xldKGAQT8,3324
46
- natural_pdf/elements/region.py,sha256=PoT4e2s0gPkMa2Px0LjkThi-Jc8O0_ebl6U7UYADAQk,155289
47
- natural_pdf/elements/text.py,sha256=IyyU3G4F3OzNZ4Oo0BTK_Wq0p0xFj5EYBWNVL4SZ-BQ,20492
47
+ natural_pdf/elements/region.py,sha256=ClL2vxx2aVoAecaAlkUDZ2ygvUiP8oTa-xfIclm2Eg8,155286
48
+ natural_pdf/elements/text.py,sha256=829uSJv9E-8cC6T6iR_Va7Xtv54pJoyRN78fq4NN1d4,20687
48
49
  natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
49
50
  natural_pdf/exporters/__init__.py,sha256=QffoARekR6WzXEd05oxOytly4qPdBizuIF-SUkeFpig,643
50
51
  natural_pdf/exporters/base.py,sha256=379sioW_hbkGb21sEVuJhbkkDO5MFsFtTUNO5TgG2YU,2101
@@ -56,8 +57,8 @@ natural_pdf/exporters/searchable_pdf.py,sha256=7RDNTV2jK5b5PhZz-v-kpYGTDCXu8FBgX
56
57
  natural_pdf/exporters/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
58
  natural_pdf/exporters/data/pdf.ttf,sha256=x4RUIJJaI9iO2DCmOVe4r4Wmao2vjZ_JDoQ2c7LvGlk,572
58
59
  natural_pdf/exporters/data/sRGB.icc,sha256=KpLUuuRQt22LCqQhk9-XTXX2Jzjs6_dPAcXnWxKpV5Y,6922
59
- natural_pdf/extraction/manager.py,sha256=sASPJZ5cWFsl8A4PyTjg2yqkyC00tRl6glfoFA6HcsM,4979
60
- natural_pdf/extraction/mixin.py,sha256=ck2e48BYZg5RNderNE0QST6RSn2D6mIZYBw91nMSgp8,24970
60
+ natural_pdf/extraction/manager.py,sha256=R-wGe9PGky6r4BTSUPMXf3N2l12kycku3GJKEd45eFU,4701
61
+ natural_pdf/extraction/mixin.py,sha256=dBcp96R8zMQqaRHiB8vpyad8GR89gv5RPXlr8Mt0ais,25427
61
62
  natural_pdf/extraction/result.py,sha256=PDaCCN2LQBbHsZy0_lrQ0ROeMsnmH1WRoXWOjk9M2o4,1825
62
63
  natural_pdf/flows/__init__.py,sha256=cUN4A8hTDLZSRr4PO2W_lR4z6hWpbNG8Seox-IIcrLU,277
63
64
  natural_pdf/flows/collections.py,sha256=ErkHWdX6W_y1SjkcA_bGM0uUYRGPWWpRkHip6LHpej0,25740
@@ -84,9 +85,9 @@ natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzP
84
85
  natural_pdf/search/search_service_protocol.py,sha256=u8pbuWP96fnQEe6mnreY9DrdiDAHP6ZCY7phvSbFlP8,6697
85
86
  natural_pdf/search/searchable_mixin.py,sha256=hqQ_AuID5eTGRCtKYdFLZ1zF35y73uk3x1M1VW9Il8U,23514
86
87
  natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
87
- natural_pdf/selectors/parser.py,sha256=uWo0K4uWJFbD4kTXz9fOcPwEjs7cGR9Mfpm1jm7qKUM,38824
88
+ natural_pdf/selectors/parser.py,sha256=pw0M8ICKPMOzZPzWpLsQMG_lnl8PewGIdIG3ciukabk,38877
88
89
  natural_pdf/tables/__init__.py,sha256=sCvCGbGsL6BiqlNxAYfVv003bIDLI11FmjHhaWfcU6w,104
89
- natural_pdf/tables/result.py,sha256=lfhLs5OxZ2IRLNndb8zjOQBk1SPjHx4KePzI7GlRkMg,5478
90
+ natural_pdf/tables/result.py,sha256=1pcelNZvOb6Anlwj08Z1XU-YK1ihlCsLpYMRA3Zc4JM,7242
90
91
  natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
91
92
  natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
92
93
  natural_pdf/utils/bidi_mirror.py,sha256=jJEES0xDrMfo5Me8kHMxHv4COS51PitnYi2EvKv3HCE,1151
@@ -101,14 +102,14 @@ natural_pdf/utils/text_extraction.py,sha256=CCwPTmMoTgtQt2P00X_ADIf6ZGNfxvjCO9FO
101
102
  natural_pdf/utils/visualization.py,sha256=zhZEHgYnZFuX7YxTHXF8Y3D97uHp2beTKMaC-JkCFwk,22364
102
103
  natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
103
104
  natural_pdf/widgets/viewer.py,sha256=KW3JogdR2TMg2ECUMYp8hwd060hfg8EsYBWxb5IEzBY,24942
104
- natural_pdf-0.2.1.dev0.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
105
+ natural_pdf-0.2.2.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
105
106
  optimization/memory_comparison.py,sha256=0i_foFSRmppj-fY069qjwH36s_zkx-1L2ASAAlepWzA,6541
106
107
  optimization/pdf_analyzer.py,sha256=HjrmTgu2qchxPeDckc5kjgxppGwd40UESrYS9Myj7pY,19352
107
- optimization/performance_analysis.py,sha256=RjAqeE3YS1r_7qTWkY6Ng5YMbb6MXJXfXX6LoVjg_xQ,13035
108
+ optimization/performance_analysis.py,sha256=JBXnR9hc7Ix7YCnt3EJPSpsyqIUgKsc7GEffQ_TDCBk,13033
108
109
  optimization/test_cleanup_methods.py,sha256=PmLOL4MRgvV0j_DW9W1TS8MsGGgu57QCuq6_5y7zK3s,6209
109
110
  optimization/test_memory_fix.py,sha256=A3knK74fNhvHknDbLhbTmA276x1ifl-3ivJ_7BhVSTI,6170
110
111
  tools/bad_pdf_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
111
- tools/bad_pdf_eval/analyser.py,sha256=bKUT3muP3ESE5i1D8sGyAS5tMzFMcq-i-xD_ZeUxYhY,13692
112
+ tools/bad_pdf_eval/analyser.py,sha256=oqSTo3NLyignp_XdCO9_SRCUUXMU8lfgDavKYZYNxws,13690
112
113
  tools/bad_pdf_eval/collate_summaries.py,sha256=L_YsdiqmwGIHYWTVJqo6gyazyn3GIQgpfGGKk8uwckk,5159
113
114
  tools/bad_pdf_eval/compile_attempts_markdown.py,sha256=ArFDZaSa9dz0ez0lsNlbUSK4hbvB3___DlfwqPEAZpY,4359
114
115
  tools/bad_pdf_eval/eval_suite.py,sha256=zcapsGwO-VJ2OupJnPYKbrkzvzdGdoh2DZPK19bfkQg,4450
@@ -118,8 +119,8 @@ tools/bad_pdf_eval/llm_enrich.py,sha256=mCh4KGi1HmIkzGjj5rrHz1Osd7sEX1IZ_FW08H1t
118
119
  tools/bad_pdf_eval/llm_enrich_with_retry.py,sha256=XUtPF1hUvqd3frDXT0wDTXoonuAivhjM5vgFdZ-tm0A,9373
119
120
  tools/bad_pdf_eval/reporter.py,sha256=e1g__mkSB4q02p3mGWOwMhvFs7F2HJosNBxup0-LkyU,400
120
121
  tools/bad_pdf_eval/utils.py,sha256=hR95XQ7qf7Cu6BdyX0L7ggGVx-ah5sK0jHWblTJUUic,4896
121
- natural_pdf-0.2.1.dev0.dist-info/METADATA,sha256=A8hOXH7KhQgMTCKN0keud9u2m9V-_RnWPWjaSBo7Luc,6956
122
- natural_pdf-0.2.1.dev0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
123
- natural_pdf-0.2.1.dev0.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
124
- natural_pdf-0.2.1.dev0.dist-info/top_level.txt,sha256=80t0F2ZeX4vN4Ke5iTflcOk_PN_0USn33ha3X6X86Ik,36
125
- natural_pdf-0.2.1.dev0.dist-info/RECORD,,
122
+ natural_pdf-0.2.2.dist-info/METADATA,sha256=uLGyhgV-iSjcvpvaj9s8ArQUzg1UTAF6bPXTf4BuZSE,6959
123
+ natural_pdf-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
124
+ natural_pdf-0.2.2.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
125
+ natural_pdf-0.2.2.dist-info/top_level.txt,sha256=80t0F2ZeX4vN4Ke5iTflcOk_PN_0USn33ha3X6X86Ik,36
126
+ natural_pdf-0.2.2.dist-info/RECORD,,
@@ -211,7 +211,7 @@ class PDFPerformanceTester:
211
211
 
212
212
  for resolution in resolutions:
213
213
  try:
214
- img = page.to_image(resolution=resolution)
214
+ img = page.render(resolution=resolution)
215
215
 
216
216
  self.profiler.take_snapshot(
217
217
  f"image_{resolution}dpi_{i+1}",
@@ -39,7 +39,7 @@ class BadPDFAnalyzer:
39
39
  # ---------------------------------------------------------------------
40
40
  def _save_page_image(self, page, page_num: int) -> Path:
41
41
  """Render and save page image as high-quality JPG."""
42
- img: Image.Image = page.to_image(resolution=self.resolution)
42
+ img: Image.Image = page.render(resolution=self.resolution)
43
43
  if img.mode != "RGB":
44
44
  img = img.convert("RGB")
45
45
  img_path = self.output_dir / f"page_{page_num:04d}.jpg"