magic-pdf 1.3.2__py3-none-any.whl → 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
magic_pdf/libs/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.3.2"
1
+ __version__ = "1.3.3"
@@ -147,7 +147,7 @@ def doc_analyze(
147
147
  images.append(img_dict['img'])
148
148
  page_wh_list.append((img_dict['width'], img_dict['height']))
149
149
 
150
- images_with_extra_info = [(images[index], ocr, dataset._lang) for index in range(len(dataset))]
150
+ images_with_extra_info = [(images[index], ocr, dataset._lang) for index in range(len(images))]
151
151
 
152
152
  if len(images) >= MIN_BATCH_INFERENCE_SIZE:
153
153
  batch_size = MIN_BATCH_INFERENCE_SIZE
@@ -99,11 +99,11 @@ def ocr_prepare_bboxes_for_layout_split_v2(
99
99
  all_discarded_blocks = []
100
100
  add_bboxes(discarded_blocks, BlockType.Discarded, all_discarded_blocks)
101
101
 
102
- """footnote识别:宽度超过1/3页面宽度的,高度超过10的,处于页面下半50%区域的"""
102
+ """footnote识别:宽度超过1/3页面宽度的,高度超过10的,处于页面下半30%区域的"""
103
103
  footnote_blocks = []
104
104
  for discarded in discarded_blocks:
105
105
  x0, y0, x1, y1 = discarded['bbox']
106
- if (x1 - x0) > (page_w / 3) and (y1 - y0) > 10 and y0 > (page_h / 2):
106
+ if (x1 - x0) > (page_w / 3) and (y1 - y0) > 10 and y0 > (page_h * 0.7):
107
107
  footnote_blocks.append([x0, y0, x1, y1])
108
108
 
109
109
  """移除在footnote下面的任何框"""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: magic-pdf
3
- Version: 1.3.2
3
+ Version: 1.3.3
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  License: AGPL-3.0
6
6
  Project-URL: Home, https://mineru.net/
@@ -29,7 +29,7 @@ Requires-Dist: tqdm >=4.67.1
29
29
  Requires-Dist: transformers !=4.51.0,<5.0.0,>=4.49.0
30
30
  Provides-Extra: full
31
31
  Requires-Dist: PyYAML <7,>=6.0.2 ; extra == 'full'
32
- Requires-Dist: dill <1,>=0.3.9 ; extra == 'full'
32
+ Requires-Dist: dill <1,>=0.3.8 ; extra == 'full'
33
33
  Requires-Dist: doclayout-yolo ==0.0.2b1 ; extra == 'full'
34
34
  Requires-Dist: ftfy <7,>=6.3.1 ; extra == 'full'
35
35
  Requires-Dist: matplotlib <4,>=3.10 ; extra == 'full'
@@ -42,7 +42,7 @@ Requires-Dist: ultralytics <9,>=8.3.48 ; extra == 'full'
42
42
  Provides-Extra: full_old_linux
43
43
  Requires-Dist: PyYAML ==6.0.2 ; extra == 'full_old_linux'
44
44
  Requires-Dist: albumentations ==1.4.20 ; extra == 'full_old_linux'
45
- Requires-Dist: dill ==0.3.9 ; extra == 'full_old_linux'
45
+ Requires-Dist: dill ==0.3.8 ; extra == 'full_old_linux'
46
46
  Requires-Dist: doclayout-yolo ==0.0.2b1 ; extra == 'full_old_linux'
47
47
  Requires-Dist: ftfy ==6.3.1 ; extra == 'full_old_linux'
48
48
  Requires-Dist: matplotlib <=3.10.1,>=3.10 ; extra == 'full_old_linux'
@@ -52,10 +52,10 @@ magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3
52
52
  magic_pdf/libs/pdf_image_tools.py,sha256=_au7plmKKctpPKozBumSKgP8689q4vH1mU8VMLO0IbM,2260
53
53
  magic_pdf/libs/performance_stats.py,sha256=DW-c6nUTUnWKGTONRKfpucsYZm1ake016F9K7jJwbik,2136
54
54
  magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
55
- magic_pdf/libs/version.py,sha256=HgKA3RqZvC7slo8MgLyffCGwJbQ3cY6I7oUMFvGLWps,22
55
+ magic_pdf/libs/version.py,sha256=Vi6om3KImlKsS_Wg5CjUgYffoi2zx7T-SRPnnGL0G7M,22
56
56
  magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
57
57
  magic_pdf/model/batch_analyze.py,sha256=yKhKQuZTh9GG83p61bw2BRqKMbnsjsmX73gfuTRk8xE,11272
58
- magic_pdf/model/doc_analyze_by_custom_model.py,sha256=R9oowGvH3tN1knoSHiyECKsoW3RuKZ1y5cJd42FNurE,10318
58
+ magic_pdf/model/doc_analyze_by_custom_model.py,sha256=-cjn7DQi6kZCqVZ0IxbXuL2kmeGhSVLzLaezIHPFzMU,10317
59
59
  magic_pdf/model/magic_model.py,sha256=yZKWo_wRck_-YLyFGRiUHGar8sV1Y6458BFLbyBAt74,30682
60
60
  magic_pdf/model/model_list.py,sha256=aqfEJlEfbib3D3ISrxc0Coh6SbffYh8Yq2FlQN35_zA,213
61
61
  magic_pdf/model/pdf_extract_kit.py,sha256=C3sKqRkoD20Ldmo-cqGn1zRldEL-l5NYqcFvd05_fGU,10845
@@ -178,7 +178,7 @@ magic_pdf/post_proc/para_split_v3.py,sha256=SPN_VVGvFX5KpFMGw9OzgoE-kTZq-FF036i0
178
178
  magic_pdf/pre_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
179
179
  magic_pdf/pre_proc/construct_page_dict.py,sha256=OFmq5XRKi6fYIo-lmGlL-NB16Sf0egzsfEx-fT2uYrc,660
180
180
  magic_pdf/pre_proc/cut_image.py,sha256=NDzbxwD7z7Tb4uAxL4KR6LzURFdN1Tzr4nPvj-VmEqc,1225
181
- magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=nt88ttXCEI_1ihAF7HU15SQjwM69V-iJmk-L_nyzA6o,9328
181
+ magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=3_bEbZC_BDwbuaBLPdCIbkxz93-g9oCtvjuXD8qbklo,9330
182
182
  magic_pdf/pre_proc/ocr_dict_merge.py,sha256=PscKGF0uJIjMxZRM69FLUs1SZO_wOswDQQV1f0M2xAo,5627
183
183
  magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=bs5RLvk4kIyx9_Hqq0FU3AGPPxE8Sxs97Uwlf1sBryM,4725
184
184
  magic_pdf/pre_proc/remove_bbox_overlap.py,sha256=mcdxAh4P56NZ3Ij8h3vW8qC_SrszfXflVWuWUuUiTNg,3089
@@ -195,9 +195,9 @@ magic_pdf/tools/common.py,sha256=-x0RSFr7SNbdYq7DntaLYmQmaxyF-xKSf4xMpSUTzA0,126
195
195
  magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
196
196
  magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
197
197
  magic_pdf/utils/office_to_pdf.py,sha256=7aj-Ls2v8saD-Rgu_t3FIc-J3Ka9wnmiEH5zY-H1Vxs,729
198
- magic_pdf-1.3.2.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
199
- magic_pdf-1.3.2.dist-info/METADATA,sha256=R56SxjE08VgwAabtD81lCHxjH0hho1-c3-Zeb_zkUjo,45615
200
- magic_pdf-1.3.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
201
- magic_pdf-1.3.2.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
202
- magic_pdf-1.3.2.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
203
- magic_pdf-1.3.2.dist-info/RECORD,,
198
+ magic_pdf-1.3.3.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
199
+ magic_pdf-1.3.3.dist-info/METADATA,sha256=1Y-a4UouLQRhsldrhz6UZLlx4KUFOdjSk5R1gK_oYjs,45615
200
+ magic_pdf-1.3.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
201
+ magic_pdf-1.3.3.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
202
+ magic_pdf-1.3.3.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
203
+ magic_pdf-1.3.3.dist-info/RECORD,,