magic-pdf 1.3.6__py3-none-any.whl → 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
magic_pdf/libs/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.3.6"
1
+ __version__ = "1.3.7"
@@ -161,20 +161,13 @@ class BatchAnalyze:
161
161
  for table_res_dict in tqdm(table_res_list_all_page, desc="Table Predict"):
162
162
  _lang = table_res_dict['lang']
163
163
  atom_model_manager = AtomModelSingleton()
164
- ocr_engine = atom_model_manager.get_atom_model(
165
- atom_model_name='ocr',
166
- ocr_show_log=False,
167
- det_db_box_thresh=0.5,
168
- det_db_unclip_ratio=1.6,
169
- lang=_lang
170
- )
171
164
  table_model = atom_model_manager.get_atom_model(
172
165
  atom_model_name='table',
173
166
  table_model_name='rapid_table',
174
167
  table_model_path='',
175
168
  table_max_time=400,
176
169
  device='cpu',
177
- ocr_engine=ocr_engine,
170
+ lang=_lang,
178
171
  table_sub_model_name='slanet_plus'
179
172
  )
180
173
  html_code, table_cell_bboxes, logic_points, elapse = table_model.predict(table_res_dict['table_img'])
@@ -53,6 +53,11 @@ class PytorchPaddleOCR(TextSystem):
53
53
  args = parser.parse_args(args)
54
54
 
55
55
  self.lang = kwargs.get('lang', 'ch')
56
+
57
+ device = get_device()
58
+ if device == 'cpu' and self.lang == 'ch':
59
+ self.lang = 'ch_lite'
60
+
56
61
  if self.lang in latin_lang:
57
62
  self.lang = 'latin'
58
63
  elif self.lang in arabic_lang:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: magic-pdf
3
- Version: 1.3.6
3
+ Version: 1.3.7
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  License: AGPL-3.0
6
6
  Project-URL: Home, https://mineru.net/
@@ -107,6 +107,9 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
107
107
  </div>
108
108
 
109
109
  # Changelog
110
+ - 2025/04/22 1.3.7 Released
111
+ - Fixed the issue where the `lang` parameter was ineffective during table parsing model initialization.
112
+ - Fixed the significant slowdown in OCR and table parsing speed in `cpu` mode.
110
113
  - 2025/04/16 1.3.4 Released
111
114
  - Slightly improved the speed of OCR detection by removing some unused blocks.
112
115
  - Fixed page-level sorting errors caused by footnotes in certain cases.
@@ -52,9 +52,9 @@ magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3
52
52
  magic_pdf/libs/pdf_image_tools.py,sha256=_au7plmKKctpPKozBumSKgP8689q4vH1mU8VMLO0IbM,2260
53
53
  magic_pdf/libs/performance_stats.py,sha256=DW-c6nUTUnWKGTONRKfpucsYZm1ake016F9K7jJwbik,2136
54
54
  magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
55
- magic_pdf/libs/version.py,sha256=5ZbAQtod5QalTI1C2N07edlxplzG_Q2XvGOSyOok4uA,22
55
+ magic_pdf/libs/version.py,sha256=9peaXOar2qezOPJEKG6cD_A0aaXrzdVN8h-v6fBoBEk,22
56
56
  magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
57
- magic_pdf/model/batch_analyze.py,sha256=yKhKQuZTh9GG83p61bw2BRqKMbnsjsmX73gfuTRk8xE,11272
57
+ magic_pdf/model/batch_analyze.py,sha256=F0WsjbQ6z9txdiUiVy6n6zhyJWJ-4moljNx8fe8HFws,10977
58
58
  magic_pdf/model/doc_analyze_by_custom_model.py,sha256=-cjn7DQi6kZCqVZ0IxbXuL2kmeGhSVLzLaezIHPFzMU,10317
59
59
  magic_pdf/model/magic_model.py,sha256=yZKWo_wRck_-YLyFGRiUHGar8sV1Y6458BFLbyBAt74,30682
60
60
  magic_pdf/model/model_list.py,sha256=aqfEJlEfbib3D3ISrxc0Coh6SbffYh8Yq2FlQN35_zA,213
@@ -109,7 +109,7 @@ magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unim
109
109
  magic_pdf/model/sub_modules/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
110
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
111
111
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/ocr_utils.py,sha256=3qxu0lAjqzZQ2Ci-C_wz_YSakyq_5-KnckA3-5bICTM,12589
112
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py,sha256=FaPo02L1IJKybGYfydsohOiHstJIL8d5UKzGck2tYvk,7283
112
+ magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py,sha256=zCQ9cv8yBwTNELakqYXb3KJ0buj7tocByOY4xjXOG4U,7399
113
113
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
114
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py,sha256=5bI7MAu65r-vn28krwdJ6pjZMkEvWjspE7EQaTsRERw,1319
115
115
  magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py,sha256=YYu3c-W4fgEErxxDM98uQ3oWwPEh-6w75LY4zcj4VtM,199
@@ -195,9 +195,9 @@ magic_pdf/tools/common.py,sha256=-x0RSFr7SNbdYq7DntaLYmQmaxyF-xKSf4xMpSUTzA0,126
195
195
  magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
196
196
  magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
197
197
  magic_pdf/utils/office_to_pdf.py,sha256=bFRYe6v3-pfx5R8-bV8cmf12jPnOrYZsleKoECTXzbM,3958
198
- magic_pdf-1.3.6.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
199
- magic_pdf-1.3.6.dist-info/METADATA,sha256=WciKDOUh2xH6OdTqDPJlMlLrUhaHBseO5dycx0_7RLo,45798
200
- magic_pdf-1.3.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
201
- magic_pdf-1.3.6.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
202
- magic_pdf-1.3.6.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
203
- magic_pdf-1.3.6.dist-info/RECORD,,
198
+ magic_pdf-1.3.7.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
199
+ magic_pdf-1.3.7.dist-info/METADATA,sha256=9JPxH4h9dqV4n6TU7L__FCFCDEGf495v5X4uN4rvXMs,46013
200
+ magic_pdf-1.3.7.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
201
+ magic_pdf-1.3.7.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
202
+ magic_pdf-1.3.7.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
203
+ magic_pdf-1.3.7.dist-info/RECORD,,