magic-pdf 1.3.2__py3-none-any.whl → 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/libs/version.py +1 -1
- magic_pdf/model/doc_analyze_by_custom_model.py +1 -1
- magic_pdf/pre_proc/ocr_detect_all_bboxes.py +2 -2
- {magic_pdf-1.3.2.dist-info → magic_pdf-1.3.3.dist-info}/METADATA +3 -3
- {magic_pdf-1.3.2.dist-info → magic_pdf-1.3.3.dist-info}/RECORD +9 -9
- {magic_pdf-1.3.2.dist-info → magic_pdf-1.3.3.dist-info}/LICENSE.md +0 -0
- {magic_pdf-1.3.2.dist-info → magic_pdf-1.3.3.dist-info}/WHEEL +0 -0
- {magic_pdf-1.3.2.dist-info → magic_pdf-1.3.3.dist-info}/entry_points.txt +0 -0
- {magic_pdf-1.3.2.dist-info → magic_pdf-1.3.3.dist-info}/top_level.txt +0 -0
magic_pdf/libs/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.3.
|
1
|
+
__version__ = "1.3.3"
|
@@ -147,7 +147,7 @@ def doc_analyze(
|
|
147
147
|
images.append(img_dict['img'])
|
148
148
|
page_wh_list.append((img_dict['width'], img_dict['height']))
|
149
149
|
|
150
|
-
images_with_extra_info = [(images[index], ocr, dataset._lang) for index in range(len(
|
150
|
+
images_with_extra_info = [(images[index], ocr, dataset._lang) for index in range(len(images))]
|
151
151
|
|
152
152
|
if len(images) >= MIN_BATCH_INFERENCE_SIZE:
|
153
153
|
batch_size = MIN_BATCH_INFERENCE_SIZE
|
@@ -99,11 +99,11 @@ def ocr_prepare_bboxes_for_layout_split_v2(
|
|
99
99
|
all_discarded_blocks = []
|
100
100
|
add_bboxes(discarded_blocks, BlockType.Discarded, all_discarded_blocks)
|
101
101
|
|
102
|
-
"""footnote识别:宽度超过1/3页面宽度的,高度超过10的,处于页面下半
|
102
|
+
"""footnote识别:宽度超过1/3页面宽度的,高度超过10的,处于页面下半30%区域的"""
|
103
103
|
footnote_blocks = []
|
104
104
|
for discarded in discarded_blocks:
|
105
105
|
x0, y0, x1, y1 = discarded['bbox']
|
106
|
-
if (x1 - x0) > (page_w / 3) and (y1 - y0) > 10 and y0 > (page_h
|
106
|
+
if (x1 - x0) > (page_w / 3) and (y1 - y0) > 10 and y0 > (page_h * 0.7):
|
107
107
|
footnote_blocks.append([x0, y0, x1, y1])
|
108
108
|
|
109
109
|
"""移除在footnote下面的任何框"""
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: magic-pdf
|
3
|
-
Version: 1.3.
|
3
|
+
Version: 1.3.3
|
4
4
|
Summary: A practical tool for converting PDF to Markdown
|
5
5
|
License: AGPL-3.0
|
6
6
|
Project-URL: Home, https://mineru.net/
|
@@ -29,7 +29,7 @@ Requires-Dist: tqdm >=4.67.1
|
|
29
29
|
Requires-Dist: transformers !=4.51.0,<5.0.0,>=4.49.0
|
30
30
|
Provides-Extra: full
|
31
31
|
Requires-Dist: PyYAML <7,>=6.0.2 ; extra == 'full'
|
32
|
-
Requires-Dist: dill <1,>=0.3.
|
32
|
+
Requires-Dist: dill <1,>=0.3.8 ; extra == 'full'
|
33
33
|
Requires-Dist: doclayout-yolo ==0.0.2b1 ; extra == 'full'
|
34
34
|
Requires-Dist: ftfy <7,>=6.3.1 ; extra == 'full'
|
35
35
|
Requires-Dist: matplotlib <4,>=3.10 ; extra == 'full'
|
@@ -42,7 +42,7 @@ Requires-Dist: ultralytics <9,>=8.3.48 ; extra == 'full'
|
|
42
42
|
Provides-Extra: full_old_linux
|
43
43
|
Requires-Dist: PyYAML ==6.0.2 ; extra == 'full_old_linux'
|
44
44
|
Requires-Dist: albumentations ==1.4.20 ; extra == 'full_old_linux'
|
45
|
-
Requires-Dist: dill ==0.3.
|
45
|
+
Requires-Dist: dill ==0.3.8 ; extra == 'full_old_linux'
|
46
46
|
Requires-Dist: doclayout-yolo ==0.0.2b1 ; extra == 'full_old_linux'
|
47
47
|
Requires-Dist: ftfy ==6.3.1 ; extra == 'full_old_linux'
|
48
48
|
Requires-Dist: matplotlib <=3.10.1,>=3.10 ; extra == 'full_old_linux'
|
@@ -52,10 +52,10 @@ magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3
|
|
52
52
|
magic_pdf/libs/pdf_image_tools.py,sha256=_au7plmKKctpPKozBumSKgP8689q4vH1mU8VMLO0IbM,2260
|
53
53
|
magic_pdf/libs/performance_stats.py,sha256=DW-c6nUTUnWKGTONRKfpucsYZm1ake016F9K7jJwbik,2136
|
54
54
|
magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
|
55
|
-
magic_pdf/libs/version.py,sha256=
|
55
|
+
magic_pdf/libs/version.py,sha256=Vi6om3KImlKsS_Wg5CjUgYffoi2zx7T-SRPnnGL0G7M,22
|
56
56
|
magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
|
57
57
|
magic_pdf/model/batch_analyze.py,sha256=yKhKQuZTh9GG83p61bw2BRqKMbnsjsmX73gfuTRk8xE,11272
|
58
|
-
magic_pdf/model/doc_analyze_by_custom_model.py,sha256
|
58
|
+
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=-cjn7DQi6kZCqVZ0IxbXuL2kmeGhSVLzLaezIHPFzMU,10317
|
59
59
|
magic_pdf/model/magic_model.py,sha256=yZKWo_wRck_-YLyFGRiUHGar8sV1Y6458BFLbyBAt74,30682
|
60
60
|
magic_pdf/model/model_list.py,sha256=aqfEJlEfbib3D3ISrxc0Coh6SbffYh8Yq2FlQN35_zA,213
|
61
61
|
magic_pdf/model/pdf_extract_kit.py,sha256=C3sKqRkoD20Ldmo-cqGn1zRldEL-l5NYqcFvd05_fGU,10845
|
@@ -178,7 +178,7 @@ magic_pdf/post_proc/para_split_v3.py,sha256=SPN_VVGvFX5KpFMGw9OzgoE-kTZq-FF036i0
|
|
178
178
|
magic_pdf/pre_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
179
179
|
magic_pdf/pre_proc/construct_page_dict.py,sha256=OFmq5XRKi6fYIo-lmGlL-NB16Sf0egzsfEx-fT2uYrc,660
|
180
180
|
magic_pdf/pre_proc/cut_image.py,sha256=NDzbxwD7z7Tb4uAxL4KR6LzURFdN1Tzr4nPvj-VmEqc,1225
|
181
|
-
magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=
|
181
|
+
magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=3_bEbZC_BDwbuaBLPdCIbkxz93-g9oCtvjuXD8qbklo,9330
|
182
182
|
magic_pdf/pre_proc/ocr_dict_merge.py,sha256=PscKGF0uJIjMxZRM69FLUs1SZO_wOswDQQV1f0M2xAo,5627
|
183
183
|
magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=bs5RLvk4kIyx9_Hqq0FU3AGPPxE8Sxs97Uwlf1sBryM,4725
|
184
184
|
magic_pdf/pre_proc/remove_bbox_overlap.py,sha256=mcdxAh4P56NZ3Ij8h3vW8qC_SrszfXflVWuWUuUiTNg,3089
|
@@ -195,9 +195,9 @@ magic_pdf/tools/common.py,sha256=-x0RSFr7SNbdYq7DntaLYmQmaxyF-xKSf4xMpSUTzA0,126
|
|
195
195
|
magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
196
196
|
magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
|
197
197
|
magic_pdf/utils/office_to_pdf.py,sha256=7aj-Ls2v8saD-Rgu_t3FIc-J3Ka9wnmiEH5zY-H1Vxs,729
|
198
|
-
magic_pdf-1.3.
|
199
|
-
magic_pdf-1.3.
|
200
|
-
magic_pdf-1.3.
|
201
|
-
magic_pdf-1.3.
|
202
|
-
magic_pdf-1.3.
|
203
|
-
magic_pdf-1.3.
|
198
|
+
magic_pdf-1.3.3.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
199
|
+
magic_pdf-1.3.3.dist-info/METADATA,sha256=1Y-a4UouLQRhsldrhz6UZLlx4KUFOdjSk5R1gK_oYjs,45615
|
200
|
+
magic_pdf-1.3.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
201
|
+
magic_pdf-1.3.3.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
|
202
|
+
magic_pdf-1.3.3.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
|
203
|
+
magic_pdf-1.3.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|