magic-pdf 1.3.6__py3-none-any.whl → 1.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/libs/version.py +1 -1
- magic_pdf/model/batch_analyze.py +1 -8
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py +5 -0
- {magic_pdf-1.3.6.dist-info → magic_pdf-1.3.7.dist-info}/METADATA +4 -1
- {magic_pdf-1.3.6.dist-info → magic_pdf-1.3.7.dist-info}/RECORD +9 -9
- {magic_pdf-1.3.6.dist-info → magic_pdf-1.3.7.dist-info}/LICENSE.md +0 -0
- {magic_pdf-1.3.6.dist-info → magic_pdf-1.3.7.dist-info}/WHEEL +0 -0
- {magic_pdf-1.3.6.dist-info → magic_pdf-1.3.7.dist-info}/entry_points.txt +0 -0
- {magic_pdf-1.3.6.dist-info → magic_pdf-1.3.7.dist-info}/top_level.txt +0 -0
magic_pdf/libs/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.3.
|
1
|
+
__version__ = "1.3.7"
|
magic_pdf/model/batch_analyze.py
CHANGED
@@ -161,20 +161,13 @@ class BatchAnalyze:
|
|
161
161
|
for table_res_dict in tqdm(table_res_list_all_page, desc="Table Predict"):
|
162
162
|
_lang = table_res_dict['lang']
|
163
163
|
atom_model_manager = AtomModelSingleton()
|
164
|
-
ocr_engine = atom_model_manager.get_atom_model(
|
165
|
-
atom_model_name='ocr',
|
166
|
-
ocr_show_log=False,
|
167
|
-
det_db_box_thresh=0.5,
|
168
|
-
det_db_unclip_ratio=1.6,
|
169
|
-
lang=_lang
|
170
|
-
)
|
171
164
|
table_model = atom_model_manager.get_atom_model(
|
172
165
|
atom_model_name='table',
|
173
166
|
table_model_name='rapid_table',
|
174
167
|
table_model_path='',
|
175
168
|
table_max_time=400,
|
176
169
|
device='cpu',
|
177
|
-
|
170
|
+
lang=_lang,
|
178
171
|
table_sub_model_name='slanet_plus'
|
179
172
|
)
|
180
173
|
html_code, table_cell_bboxes, logic_points, elapse = table_model.predict(table_res_dict['table_img'])
|
@@ -53,6 +53,11 @@ class PytorchPaddleOCR(TextSystem):
|
|
53
53
|
args = parser.parse_args(args)
|
54
54
|
|
55
55
|
self.lang = kwargs.get('lang', 'ch')
|
56
|
+
|
57
|
+
device = get_device()
|
58
|
+
if device == 'cpu' and self.lang == 'ch':
|
59
|
+
self.lang = 'ch_lite'
|
60
|
+
|
56
61
|
if self.lang in latin_lang:
|
57
62
|
self.lang = 'latin'
|
58
63
|
elif self.lang in arabic_lang:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: magic-pdf
|
3
|
-
Version: 1.3.
|
3
|
+
Version: 1.3.7
|
4
4
|
Summary: A practical tool for converting PDF to Markdown
|
5
5
|
License: AGPL-3.0
|
6
6
|
Project-URL: Home, https://mineru.net/
|
@@ -107,6 +107,9 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
|
|
107
107
|
</div>
|
108
108
|
|
109
109
|
# Changelog
|
110
|
+
- 2025/04/22 1.3.7 Released
|
111
|
+
- Fixed the issue where the `lang` parameter was ineffective during table parsing model initialization.
|
112
|
+
- Fixed the significant slowdown in OCR and table parsing speed in `cpu` mode.
|
110
113
|
- 2025/04/16 1.3.4 Released
|
111
114
|
- Slightly improved the speed of OCR detection by removing some unused blocks.
|
112
115
|
- Fixed page-level sorting errors caused by footnotes in certain cases.
|
@@ -52,9 +52,9 @@ magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3
|
|
52
52
|
magic_pdf/libs/pdf_image_tools.py,sha256=_au7plmKKctpPKozBumSKgP8689q4vH1mU8VMLO0IbM,2260
|
53
53
|
magic_pdf/libs/performance_stats.py,sha256=DW-c6nUTUnWKGTONRKfpucsYZm1ake016F9K7jJwbik,2136
|
54
54
|
magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
|
55
|
-
magic_pdf/libs/version.py,sha256=
|
55
|
+
magic_pdf/libs/version.py,sha256=9peaXOar2qezOPJEKG6cD_A0aaXrzdVN8h-v6fBoBEk,22
|
56
56
|
magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
|
57
|
-
magic_pdf/model/batch_analyze.py,sha256=
|
57
|
+
magic_pdf/model/batch_analyze.py,sha256=F0WsjbQ6z9txdiUiVy6n6zhyJWJ-4moljNx8fe8HFws,10977
|
58
58
|
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=-cjn7DQi6kZCqVZ0IxbXuL2kmeGhSVLzLaezIHPFzMU,10317
|
59
59
|
magic_pdf/model/magic_model.py,sha256=yZKWo_wRck_-YLyFGRiUHGar8sV1Y6458BFLbyBAt74,30682
|
60
60
|
magic_pdf/model/model_list.py,sha256=aqfEJlEfbib3D3ISrxc0Coh6SbffYh8Yq2FlQN35_zA,213
|
@@ -109,7 +109,7 @@ magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unim
|
|
109
109
|
magic_pdf/model/sub_modules/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
110
110
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
111
111
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/ocr_utils.py,sha256=3qxu0lAjqzZQ2Ci-C_wz_YSakyq_5-KnckA3-5bICTM,12589
|
112
|
-
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py,sha256=
|
112
|
+
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py,sha256=zCQ9cv8yBwTNELakqYXb3KJ0buj7tocByOY4xjXOG4U,7399
|
113
113
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
114
114
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py,sha256=5bI7MAu65r-vn28krwdJ6pjZMkEvWjspE7EQaTsRERw,1319
|
115
115
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py,sha256=YYu3c-W4fgEErxxDM98uQ3oWwPEh-6w75LY4zcj4VtM,199
|
@@ -195,9 +195,9 @@ magic_pdf/tools/common.py,sha256=-x0RSFr7SNbdYq7DntaLYmQmaxyF-xKSf4xMpSUTzA0,126
|
|
195
195
|
magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
196
196
|
magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
|
197
197
|
magic_pdf/utils/office_to_pdf.py,sha256=bFRYe6v3-pfx5R8-bV8cmf12jPnOrYZsleKoECTXzbM,3958
|
198
|
-
magic_pdf-1.3.
|
199
|
-
magic_pdf-1.3.
|
200
|
-
magic_pdf-1.3.
|
201
|
-
magic_pdf-1.3.
|
202
|
-
magic_pdf-1.3.
|
203
|
-
magic_pdf-1.3.
|
198
|
+
magic_pdf-1.3.7.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
199
|
+
magic_pdf-1.3.7.dist-info/METADATA,sha256=9JPxH4h9dqV4n6TU7L__FCFCDEGf495v5X4uN4rvXMs,46013
|
200
|
+
magic_pdf-1.3.7.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
201
|
+
magic_pdf-1.3.7.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
|
202
|
+
magic_pdf-1.3.7.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
|
203
|
+
magic_pdf-1.3.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|