magic-pdf 0.5.13__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. magic_pdf/cli/magicpdf.py +18 -7
  2. magic_pdf/libs/config_reader.py +10 -0
  3. magic_pdf/libs/version.py +1 -1
  4. magic_pdf/model/__init__.py +1 -0
  5. magic_pdf/model/doc_analyze_by_custom_model.py +38 -15
  6. magic_pdf/model/model_list.py +1 -0
  7. magic_pdf/model/pdf_extract_kit.py +196 -0
  8. magic_pdf/model/pek_sub_modules/__init__.py +0 -0
  9. magic_pdf/model/pek_sub_modules/layoutlmv3/__init__.py +0 -0
  10. magic_pdf/model/pek_sub_modules/layoutlmv3/backbone.py +179 -0
  11. magic_pdf/model/pek_sub_modules/layoutlmv3/beit.py +671 -0
  12. magic_pdf/model/pek_sub_modules/layoutlmv3/deit.py +476 -0
  13. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/__init__.py +7 -0
  14. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/__init__.py +2 -0
  15. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/cord.py +171 -0
  16. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/data_collator.py +124 -0
  17. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/funsd.py +136 -0
  18. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/image_utils.py +284 -0
  19. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/xfund.py +213 -0
  20. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/__init__.py +7 -0
  21. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py +24 -0
  22. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py +60 -0
  23. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py +1282 -0
  24. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py +32 -0
  25. magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py +34 -0
  26. magic_pdf/model/pek_sub_modules/layoutlmv3/model_init.py +150 -0
  27. magic_pdf/model/pek_sub_modules/layoutlmv3/rcnn_vl.py +163 -0
  28. magic_pdf/model/pek_sub_modules/layoutlmv3/visualizer.py +1236 -0
  29. magic_pdf/model/pek_sub_modules/post_process.py +36 -0
  30. magic_pdf/model/pek_sub_modules/self_modify.py +260 -0
  31. magic_pdf/model/pp_structure_v2.py +7 -0
  32. magic_pdf/pipe/AbsPipe.py +8 -14
  33. magic_pdf/pipe/OCRPipe.py +12 -8
  34. magic_pdf/pipe/TXTPipe.py +12 -8
  35. magic_pdf/pipe/UNIPipe.py +9 -7
  36. magic_pdf/resources/model_config/UniMERNet/demo.yaml +46 -0
  37. magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml +351 -0
  38. magic_pdf/resources/model_config/model_configs.yaml +9 -0
  39. {magic_pdf-0.5.13.dist-info → magic_pdf-0.6.0.dist-info}/METADATA +18 -8
  40. {magic_pdf-0.5.13.dist-info → magic_pdf-0.6.0.dist-info}/RECORD +44 -18
  41. magic_pdf/model/360_layout_analysis.py +0 -8
  42. {magic_pdf-0.5.13.dist-info → magic_pdf-0.6.0.dist-info}/LICENSE.md +0 -0
  43. {magic_pdf-0.5.13.dist-info → magic_pdf-0.6.0.dist-info}/WHEEL +0 -0
  44. {magic_pdf-0.5.13.dist-info → magic_pdf-0.6.0.dist-info}/entry_points.txt +0 -0
  45. {magic_pdf-0.5.13.dist-info → magic_pdf-0.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,351 @@
1
+ AUG:
2
+ DETR: true
3
+ CACHE_DIR: /mnt/localdata/users/yupanhuang/cache/huggingface
4
+ CUDNN_BENCHMARK: false
5
+ DATALOADER:
6
+ ASPECT_RATIO_GROUPING: true
7
+ FILTER_EMPTY_ANNOTATIONS: false
8
+ NUM_WORKERS: 4
9
+ REPEAT_THRESHOLD: 0.0
10
+ SAMPLER_TRAIN: TrainingSampler
11
+ DATASETS:
12
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
13
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
14
+ PROPOSAL_FILES_TEST: []
15
+ PROPOSAL_FILES_TRAIN: []
16
+ TEST:
17
+ - scihub_train
18
+ TRAIN:
19
+ - scihub_train
20
+ GLOBAL:
21
+ HACK: 1.0
22
+ ICDAR_DATA_DIR_TEST: ''
23
+ ICDAR_DATA_DIR_TRAIN: ''
24
+ INPUT:
25
+ CROP:
26
+ ENABLED: true
27
+ SIZE:
28
+ - 384
29
+ - 600
30
+ TYPE: absolute_range
31
+ FORMAT: RGB
32
+ MASK_FORMAT: polygon
33
+ MAX_SIZE_TEST: 1333
34
+ MAX_SIZE_TRAIN: 1333
35
+ MIN_SIZE_TEST: 800
36
+ MIN_SIZE_TRAIN:
37
+ - 480
38
+ - 512
39
+ - 544
40
+ - 576
41
+ - 608
42
+ - 640
43
+ - 672
44
+ - 704
45
+ - 736
46
+ - 768
47
+ - 800
48
+ MIN_SIZE_TRAIN_SAMPLING: choice
49
+ RANDOM_FLIP: horizontal
50
+ MODEL:
51
+ ANCHOR_GENERATOR:
52
+ ANGLES:
53
+ - - -90
54
+ - 0
55
+ - 90
56
+ ASPECT_RATIOS:
57
+ - - 0.5
58
+ - 1.0
59
+ - 2.0
60
+ NAME: DefaultAnchorGenerator
61
+ OFFSET: 0.0
62
+ SIZES:
63
+ - - 32
64
+ - - 64
65
+ - - 128
66
+ - - 256
67
+ - - 512
68
+ BACKBONE:
69
+ FREEZE_AT: 2
70
+ NAME: build_vit_fpn_backbone
71
+ CONFIG_PATH: ''
72
+ DEVICE: cuda
73
+ FPN:
74
+ FUSE_TYPE: sum
75
+ IN_FEATURES:
76
+ - layer3
77
+ - layer5
78
+ - layer7
79
+ - layer11
80
+ NORM: ''
81
+ OUT_CHANNELS: 256
82
+ IMAGE_ONLY: true
83
+ KEYPOINT_ON: false
84
+ LOAD_PROPOSALS: false
85
+ MASK_ON: true
86
+ META_ARCHITECTURE: VLGeneralizedRCNN
87
+ PANOPTIC_FPN:
88
+ COMBINE:
89
+ ENABLED: true
90
+ INSTANCES_CONFIDENCE_THRESH: 0.5
91
+ OVERLAP_THRESH: 0.5
92
+ STUFF_AREA_LIMIT: 4096
93
+ INSTANCE_LOSS_WEIGHT: 1.0
94
+ PIXEL_MEAN:
95
+ - 127.5
96
+ - 127.5
97
+ - 127.5
98
+ PIXEL_STD:
99
+ - 127.5
100
+ - 127.5
101
+ - 127.5
102
+ PROPOSAL_GENERATOR:
103
+ MIN_SIZE: 0
104
+ NAME: RPN
105
+ RESNETS:
106
+ DEFORM_MODULATED: false
107
+ DEFORM_NUM_GROUPS: 1
108
+ DEFORM_ON_PER_STAGE:
109
+ - false
110
+ - false
111
+ - false
112
+ - false
113
+ DEPTH: 50
114
+ NORM: FrozenBN
115
+ NUM_GROUPS: 1
116
+ OUT_FEATURES:
117
+ - res4
118
+ RES2_OUT_CHANNELS: 256
119
+ RES5_DILATION: 1
120
+ STEM_OUT_CHANNELS: 64
121
+ STRIDE_IN_1X1: true
122
+ WIDTH_PER_GROUP: 64
123
+ RETINANET:
124
+ BBOX_REG_LOSS_TYPE: smooth_l1
125
+ BBOX_REG_WEIGHTS:
126
+ - 1.0
127
+ - 1.0
128
+ - 1.0
129
+ - 1.0
130
+ FOCAL_LOSS_ALPHA: 0.25
131
+ FOCAL_LOSS_GAMMA: 2.0
132
+ IN_FEATURES:
133
+ - p3
134
+ - p4
135
+ - p5
136
+ - p6
137
+ - p7
138
+ IOU_LABELS:
139
+ - 0
140
+ - -1
141
+ - 1
142
+ IOU_THRESHOLDS:
143
+ - 0.4
144
+ - 0.5
145
+ NMS_THRESH_TEST: 0.5
146
+ NORM: ''
147
+ NUM_CLASSES: 10
148
+ NUM_CONVS: 4
149
+ PRIOR_PROB: 0.01
150
+ SCORE_THRESH_TEST: 0.05
151
+ SMOOTH_L1_LOSS_BETA: 0.1
152
+ TOPK_CANDIDATES_TEST: 1000
153
+ ROI_BOX_CASCADE_HEAD:
154
+ BBOX_REG_WEIGHTS:
155
+ - - 10.0
156
+ - 10.0
157
+ - 5.0
158
+ - 5.0
159
+ - - 20.0
160
+ - 20.0
161
+ - 10.0
162
+ - 10.0
163
+ - - 30.0
164
+ - 30.0
165
+ - 15.0
166
+ - 15.0
167
+ IOUS:
168
+ - 0.5
169
+ - 0.6
170
+ - 0.7
171
+ ROI_BOX_HEAD:
172
+ BBOX_REG_LOSS_TYPE: smooth_l1
173
+ BBOX_REG_LOSS_WEIGHT: 1.0
174
+ BBOX_REG_WEIGHTS:
175
+ - 10.0
176
+ - 10.0
177
+ - 5.0
178
+ - 5.0
179
+ CLS_AGNOSTIC_BBOX_REG: true
180
+ CONV_DIM: 256
181
+ FC_DIM: 1024
182
+ NAME: FastRCNNConvFCHead
183
+ NORM: ''
184
+ NUM_CONV: 0
185
+ NUM_FC: 2
186
+ POOLER_RESOLUTION: 7
187
+ POOLER_SAMPLING_RATIO: 0
188
+ POOLER_TYPE: ROIAlignV2
189
+ SMOOTH_L1_BETA: 0.0
190
+ TRAIN_ON_PRED_BOXES: false
191
+ ROI_HEADS:
192
+ BATCH_SIZE_PER_IMAGE: 512
193
+ IN_FEATURES:
194
+ - p2
195
+ - p3
196
+ - p4
197
+ - p5
198
+ IOU_LABELS:
199
+ - 0
200
+ - 1
201
+ IOU_THRESHOLDS:
202
+ - 0.5
203
+ NAME: CascadeROIHeads
204
+ NMS_THRESH_TEST: 0.5
205
+ NUM_CLASSES: 10
206
+ POSITIVE_FRACTION: 0.25
207
+ PROPOSAL_APPEND_GT: true
208
+ SCORE_THRESH_TEST: 0.05
209
+ ROI_KEYPOINT_HEAD:
210
+ CONV_DIMS:
211
+ - 512
212
+ - 512
213
+ - 512
214
+ - 512
215
+ - 512
216
+ - 512
217
+ - 512
218
+ - 512
219
+ LOSS_WEIGHT: 1.0
220
+ MIN_KEYPOINTS_PER_IMAGE: 1
221
+ NAME: KRCNNConvDeconvUpsampleHead
222
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
223
+ NUM_KEYPOINTS: 17
224
+ POOLER_RESOLUTION: 14
225
+ POOLER_SAMPLING_RATIO: 0
226
+ POOLER_TYPE: ROIAlignV2
227
+ ROI_MASK_HEAD:
228
+ CLS_AGNOSTIC_MASK: false
229
+ CONV_DIM: 256
230
+ NAME: MaskRCNNConvUpsampleHead
231
+ NORM: ''
232
+ NUM_CONV: 4
233
+ POOLER_RESOLUTION: 14
234
+ POOLER_SAMPLING_RATIO: 0
235
+ POOLER_TYPE: ROIAlignV2
236
+ RPN:
237
+ BATCH_SIZE_PER_IMAGE: 256
238
+ BBOX_REG_LOSS_TYPE: smooth_l1
239
+ BBOX_REG_LOSS_WEIGHT: 1.0
240
+ BBOX_REG_WEIGHTS:
241
+ - 1.0
242
+ - 1.0
243
+ - 1.0
244
+ - 1.0
245
+ BOUNDARY_THRESH: -1
246
+ CONV_DIMS:
247
+ - -1
248
+ HEAD_NAME: StandardRPNHead
249
+ IN_FEATURES:
250
+ - p2
251
+ - p3
252
+ - p4
253
+ - p5
254
+ - p6
255
+ IOU_LABELS:
256
+ - 0
257
+ - -1
258
+ - 1
259
+ IOU_THRESHOLDS:
260
+ - 0.3
261
+ - 0.7
262
+ LOSS_WEIGHT: 1.0
263
+ NMS_THRESH: 0.7
264
+ POSITIVE_FRACTION: 0.5
265
+ POST_NMS_TOPK_TEST: 1000
266
+ POST_NMS_TOPK_TRAIN: 2000
267
+ PRE_NMS_TOPK_TEST: 1000
268
+ PRE_NMS_TOPK_TRAIN: 2000
269
+ SMOOTH_L1_BETA: 0.0
270
+ SEM_SEG_HEAD:
271
+ COMMON_STRIDE: 4
272
+ CONVS_DIM: 128
273
+ IGNORE_VALUE: 255
274
+ IN_FEATURES:
275
+ - p2
276
+ - p3
277
+ - p4
278
+ - p5
279
+ LOSS_WEIGHT: 1.0
280
+ NAME: SemSegFPNHead
281
+ NORM: GN
282
+ NUM_CLASSES: 10
283
+ VIT:
284
+ DROP_PATH: 0.1
285
+ IMG_SIZE:
286
+ - 224
287
+ - 224
288
+ NAME: layoutlmv3_base
289
+ OUT_FEATURES:
290
+ - layer3
291
+ - layer5
292
+ - layer7
293
+ - layer11
294
+ POS_TYPE: abs
295
+ WEIGHTS:
296
+ OUTPUT_DIR:
297
+ SCIHUB_DATA_DIR_TRAIN: /mnt/petrelfs/share_data/zhaozhiyuan/publaynet/layout_scihub/train
298
+ SEED: 42
299
+ SOLVER:
300
+ AMP:
301
+ ENABLED: true
302
+ BACKBONE_MULTIPLIER: 1.0
303
+ BASE_LR: 0.0002
304
+ BIAS_LR_FACTOR: 1.0
305
+ CHECKPOINT_PERIOD: 2000
306
+ CLIP_GRADIENTS:
307
+ CLIP_TYPE: full_model
308
+ CLIP_VALUE: 1.0
309
+ ENABLED: true
310
+ NORM_TYPE: 2.0
311
+ GAMMA: 0.1
312
+ GRADIENT_ACCUMULATION_STEPS: 1
313
+ IMS_PER_BATCH: 32
314
+ LR_SCHEDULER_NAME: WarmupCosineLR
315
+ MAX_ITER: 20000
316
+ MOMENTUM: 0.9
317
+ NESTEROV: false
318
+ OPTIMIZER: ADAMW
319
+ REFERENCE_WORLD_SIZE: 0
320
+ STEPS:
321
+ - 10000
322
+ WARMUP_FACTOR: 0.01
323
+ WARMUP_ITERS: 333
324
+ WARMUP_METHOD: linear
325
+ WEIGHT_DECAY: 0.05
326
+ WEIGHT_DECAY_BIAS: null
327
+ WEIGHT_DECAY_NORM: 0.0
328
+ TEST:
329
+ AUG:
330
+ ENABLED: false
331
+ FLIP: true
332
+ MAX_SIZE: 4000
333
+ MIN_SIZES:
334
+ - 400
335
+ - 500
336
+ - 600
337
+ - 700
338
+ - 800
339
+ - 900
340
+ - 1000
341
+ - 1100
342
+ - 1200
343
+ DETECTIONS_PER_IMAGE: 100
344
+ EVAL_PERIOD: 1000
345
+ EXPECTED_RESULTS: []
346
+ KEYPOINT_OKS_SIGMAS: []
347
+ PRECISE_BN:
348
+ ENABLED: false
349
+ NUM_ITER: 200
350
+ VERSION: 2
351
+ VIS_PERIOD: 0
@@ -0,0 +1,9 @@
1
+ config:
2
+ device: cpu
3
+ layout: True
4
+ formula: True
5
+
6
+ weights:
7
+ layout: Layout/model_final.pth
8
+ mfd: MFD/weights.pt
9
+ mfr: MFR/UniMERNet
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: magic-pdf
3
- Version: 0.5.13
3
+ Version: 0.6.0
4
4
  Summary: A practical tool for converting PDF to Markdown
5
- Home-page: https://github.com/magicpdf/Magic-PDF
5
+ Home-page: https://github.com/opendatalab/MinerU
6
6
  Requires-Python: >=3.9
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE.md
@@ -12,16 +12,21 @@ Requires-Dist: click >=8.1.7
12
12
  Requires-Dist: PyMuPDF >=1.24.7
13
13
  Requires-Dist: loguru >=0.6.0
14
14
  Requires-Dist: numpy >=1.21.6
15
- Requires-Dist: fast-langdetect >=0.1.1
15
+ Requires-Dist: fast-langdetect >=0.2.1
16
16
  Requires-Dist: wordninja >=2.0.0
17
17
  Requires-Dist: scikit-learn >=1.0.2
18
18
  Requires-Dist: pdfminer.six >=20231228
19
- Requires-Dist: numpy <2.0.0
20
19
  Provides-Extra: cpu
21
- Requires-Dist: paddleocr ; extra == 'cpu'
20
+ Requires-Dist: paddleocr ==2.7.3 ; extra == 'cpu'
22
21
  Requires-Dist: paddlepaddle ; extra == 'cpu'
22
+ Provides-Extra: full-cpu
23
+ Requires-Dist: unimernet ; extra == 'full-cpu'
24
+ Requires-Dist: matplotlib ; extra == 'full-cpu'
25
+ Requires-Dist: ultralytics ; extra == 'full-cpu'
26
+ Requires-Dist: paddleocr ==2.7.3 ; extra == 'full-cpu'
27
+ Requires-Dist: paddlepaddle ; extra == 'full-cpu'
23
28
  Provides-Extra: gpu
24
- Requires-Dist: paddleocr ; extra == 'gpu'
29
+ Requires-Dist: paddleocr ==2.7.3 ; extra == 'gpu'
25
30
  Requires-Dist: paddlepaddle-gpu ; extra == 'gpu'
26
31
 
27
32
  <div id="top"></div>
@@ -29,9 +34,14 @@ Requires-Dist: paddlepaddle-gpu ; extra == 'gpu'
29
34
 
30
35
  [![stars](https://img.shields.io/github/stars/opendatalab/MinerU.svg)](https://github.com/opendatalab/MinerU)
31
36
  [![forks](https://img.shields.io/github/forks/opendatalab/MinerU.svg)](https://github.com/opendatalab/MinerU)
32
- [![license](https://img.shields.io/github/license/opendatalab/MinerU.svg)](https://github.com/opendatalab/MinerU/tree/main/LICENSE)
33
- [![issue resolution](https://img.shields.io/github/issues-closed-raw/opendatalab/MinerU)](https://github.com/opendatalab/MinerU/issues)
34
37
  [![open issues](https://img.shields.io/github/issues-raw/opendatalab/MinerU)](https://github.com/opendatalab/MinerU/issues)
38
+ [![issue resolution](https://img.shields.io/github/issues-closed-raw/opendatalab/MinerU)](https://github.com/opendatalab/MinerU/issues)
39
+ [![PyPI version](https://badge.fury.io/py/magic-pdf.svg)](https://badge.fury.io/py/magic-pdf)
40
+ [![Downloads](https://static.pepy.tech/badge/magic-pdf)](https://pepy.tech/project/magic-pdf)
41
+ [![Downloads](https://static.pepy.tech/badge/magic-pdf/month)](https://pepy.tech/project/magic-pdf)
42
+
43
+
44
+
35
45
 
36
46
  [English](README.md) | [简体中文](README_zh-CN.md)
37
47
 
@@ -5,7 +5,7 @@ magic_pdf/pdf_parse_for_train.py,sha256=Oby61DMjJ716Jj_ri7lwXfv2Chus0pbBR2RPXrmB
5
5
  magic_pdf/pdf_parse_union_core.py,sha256=a67iQuEfuslAEF-wQplGZKXUuz5mT3HiCyvuR52E6Gw,10584
6
6
  magic_pdf/user_api.py,sha256=CVQH-VSiZpz0bSkyMT4czk1epZriIPSJsLsPbluPa9Q,3054
7
7
  magic_pdf/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- magic_pdf/cli/magicpdf.py,sha256=d4Wy2g7t_GsclV4r0vQR0enIh08-Ml2n1jf1zdrq4LE,11852
8
+ magic_pdf/cli/magicpdf.py,sha256=EcTiX-MaiDc4Fv9qZ_UdjHt5tYnBEu6vlbp0w030sA0,12691
9
9
  magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  magic_pdf/dict2md/mkcontent.py,sha256=rWUY-2opd0jeowEUEVOV_uWcKum1Q7ng4nOoT6-ka_s,17459
11
11
  magic_pdf/dict2md/ocr_mkcontent.py,sha256=RyxebPtvFfNce_HCa-_YGxwFx_srzL-BfMKc85V9JG0,15442
@@ -25,7 +25,7 @@ magic_pdf/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  magic_pdf/libs/boxbase.py,sha256=MvD0DypR4sTEF3T2RrI_yJ8mPDUBYHAqAaau2mnBSxY,15343
26
26
  magic_pdf/libs/calc_span_stats.py,sha256=5vnU27DcbkFDRSAoLqAmX0KQ3I9ehWkEgh_t9hxg_zI,10147
27
27
  magic_pdf/libs/commons.py,sha256=6Zu9-OyamyCNDY7qj0SxR-rux-ggj9im3CVPtC4ubB8,7108
28
- magic_pdf/libs/config_reader.py,sha256=ADe9DknbSBb3-JQlQJix-fkVDPIQCkytl4mKdXnIraA,1607
28
+ magic_pdf/libs/config_reader.py,sha256=wB0Zn6qEwuAWmv2Icz9owPIqxrhFEH5i6sUr8Nt5ULo,1806
29
29
  magic_pdf/libs/convert_utils.py,sha256=Ov-lsfCLBPz_15iSJXIslBNmrSf_E_1g_XDWJy8NgO8,143
30
30
  magic_pdf/libs/coordinate_transform.py,sha256=Bbop2cP2uz2ZG0U0gwd7J6EKkgABq5Rv03qf2LMPw80,429
31
31
  magic_pdf/libs/detect_language_from_model.py,sha256=Uln8F9qs8EJOw4EgI7KRlaU3lD_mK8KMTlADLFtz8fk,816
@@ -44,14 +44,37 @@ magic_pdf/libs/pdf_check.py,sha256=MAe8wzwT0qvPf_I72wEZG7k1g4haNHS7oUtLqkB5rlE,2
44
44
  magic_pdf/libs/pdf_image_tools.py,sha256=CAd01giTKr_UJz1_QtDOARG9G9z69GFpzRZwcWSfLtE,1282
45
45
  magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
46
46
  magic_pdf/libs/textbase.py,sha256=SC1Frhz3Fb7V7n2SFRBsl7Bmg0JZdlvZskq0lfW1vIk,732
47
- magic_pdf/libs/version.py,sha256=jEM-pQV3SLNuNue5fxlBM8hWNuJydsyqi_WBzC1VQaM,23
47
+ magic_pdf/libs/version.py,sha256=cID1jLnC_vj48GgMN6Yb1FA3JsQ95zNmCHmRYE8TFhY,22
48
48
  magic_pdf/libs/vis_utils.py,sha256=hTOTEakKV0pGMbk0tbRkVI_tku7A3dGc96ynObZ4kwI,10207
49
- magic_pdf/model/360_layout_analysis.py,sha256=GbchKPJRVcrxvwNXMnR4vt8lOLPauTWMl-43ayyhX7U,221
50
- magic_pdf/model/__init__.py,sha256=X6t9kPDqM8hDCbq8fQc_8jILtG6mepDjN_kadUo39Sk,29
51
- magic_pdf/model/doc_analyze_by_custom_model.py,sha256=8z4NX7Lk7CcPl1BQiNYL6dDiP63M3f6m3dmW6rjHCqg,2370
49
+ magic_pdf/model/__init__.py,sha256=XeYcF4RMZ3DosyLqiz0_n1JVa2k5RhTwUXwKt5sAjEQ,53
50
+ magic_pdf/model/doc_analyze_by_custom_model.py,sha256=kssz_Nn6zTYED_iEgGuFRjus947xoK5dTqj88FOehE0,3256
52
51
  magic_pdf/model/magic_model.py,sha256=2H6Gz1mg0f0YCvz-TLIWrAWXCQLgZftBXJNRPlSIjwc,25077
53
- magic_pdf/model/model_list.py,sha256=dNfnDodnbkgIW0PFDjn_KsQMA8DODOzo4Z4jxfOilaA,44
54
- magic_pdf/model/pp_structure_v2.py,sha256=fFbAOYEcLXlkCjqZ3yxZXR7nqtp6V8yowyjSibW3lhY,2635
52
+ magic_pdf/model/model_list.py,sha256=AqxAtKGLDn7VVXWYwk0l9LnACxDLyU2jwOJ7vjPZj04,72
53
+ magic_pdf/model/pdf_extract_kit.py,sha256=hiK1zDrwn5QhqUwI7BvM1JOoq_JIab4uVx_flHrBmWE,8374
54
+ magic_pdf/model/pp_structure_v2.py,sha256=apYWwWiCjlks5CLXolcynnuPV7llCm2PdP-6tg0-Kt0,2903
55
+ magic_pdf/model/pek_sub_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
+ magic_pdf/model/pek_sub_modules/post_process.py,sha256=HzRxV2sVR3Qo8XKYEHhT6tae-bYTb6dnAfGP6gfVNaM,1135
57
+ magic_pdf/model/pek_sub_modules/self_modify.py,sha256=XiwLUCiY_E0JkaIQr5m1hOD75-iGrgkMVe-1bzeF_Go,10522
58
+ magic_pdf/model/pek_sub_modules/layoutlmv3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
+ magic_pdf/model/pek_sub_modules/layoutlmv3/backbone.py,sha256=1cvSCczgvwOLdvzWyqttoYPMHsXmnzI3w9abJ1bAXoM,7106
60
+ magic_pdf/model/pek_sub_modules/layoutlmv3/beit.py,sha256=e-INve6bpEx_0FM5wYbQcEcelc79tzDlCljTVHaGt1w,30450
61
+ magic_pdf/model/pek_sub_modules/layoutlmv3/deit.py,sha256=Qyn5UWutZ-0GJczexCh-oMMSXtav_g3ovumMFJp8Om4,17000
62
+ magic_pdf/model/pek_sub_modules/layoutlmv3/model_init.py,sha256=POs5s4_9rS-GlE7f_iHBuZpTwOuyfI6VE3DUb37fgxA,4483
63
+ magic_pdf/model/pek_sub_modules/layoutlmv3/rcnn_vl.py,sha256=nI4G6AeLRmjavNhs5S2USKh0ozn-ftMuW0F0m_eVy3c,6649
64
+ magic_pdf/model/pek_sub_modules/layoutlmv3/visualizer.py,sha256=H6UYeCCbaN2gbDjGthTkKkPoyWxfE3azRjsR7fVBwnw,49797
65
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/__init__.py,sha256=C4N9gXJr7is7uznvQefQ7dOhlzEhdp86Lgh-7p0Y-08,186
66
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/__init__.py,sha256=W7V62JOh12NdMZj2H1sde3Il0AqW2VKplmHEsLle6tg,76
67
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/cord.py,sha256=jR_lRZxy8SeEvTK3FdlXmQHF0kefJf7ZqwM_8pvyI5E,8153
68
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/data_collator.py,sha256=M2TE47BprHSuQJYcoMeWOSpqkr_nh8VK6t2l26XWmxg,6279
69
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/funsd.py,sha256=Ez9tMeruHncJlkKQ7iRGBB9Pk1uWtgxlGeqs-sOmIG0,5214
70
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/image_utils.py,sha256=vuNOMzYw_h7jmaD2XUqkGlrjDEPB7XUts16GRICBmG4,10334
71
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/xfund.py,sha256=6jLKyc_4VhbHY4YEzBXm5RkPdsd9ldnUGXFZBLiJ-_s,8270
72
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/__init__.py,sha256=d5bm3Rx-jTrgfJDWrzD7t5R5CdHfug9dCNvUEneIYW4,190
73
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py,sha256=a04w_C0B4P9jF-3I_tXCj3fLmfFQR5XSKGbhgGm--pM,1216
74
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py,sha256=CJBcAmmLeRFVMN1YjWefoUW7hk0KXek0Eb_tergKl4Y,2150
75
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py,sha256=mdo8tO-DrJcv0Lbk9Pp98n3NQXYOnFFyXQWjU7t35kA,54633
76
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py,sha256=diKlrfxYjKAmYrUgjYdx-FXLh-swShC3tl-EBX1b3oI,1197
77
+ magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py,sha256=0lxiG69_fGpSSBYA9CBLnDa_qqa1rInZ0pJpqBwZ0Yw,1372
55
78
  magic_pdf/para/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
79
  magic_pdf/para/block_continuation_processor.py,sha256=IkReB5hirjm0OAirNzQQpnWe2f2bdP3Hug3Ef8qTRDA,22749
57
80
  magic_pdf/para/block_termination_processor.py,sha256=YU3ZYqJy9e3OQmOuQYZrR6AUpmAlQ0mhj0PgZZPZ_fM,17957
@@ -66,10 +89,10 @@ magic_pdf/para/para_split_v2.py,sha256=jGOhsubdh_CEgSv9WMNmp1loq1YNlpcAj3yh3g0gP
66
89
  magic_pdf/para/raw_processor.py,sha256=mHxD9FrdOSXH7NqM41s55URyCyuyACvm9kKtowkIb3k,6317
67
90
  magic_pdf/para/stats.py,sha256=-6Pf9Y8jkP1uJOYWiHUjw9Lb-Fb9GY7MHr_ok7x2GX0,9731
68
91
  magic_pdf/para/title_processor.py,sha256=pYZv9vEkIjAtCz8jIUtl9AVUy_ib5SdAZmMVoZtsMRI,38593
69
- magic_pdf/pipe/AbsPipe.py,sha256=28e3HxybBO86npy_L4WD6F7hfjKHHc86-IhiwzAnLdk,3979
70
- magic_pdf/pipe/OCRPipe.py,sha256=iKnNveVfsrBGl_2Xtd4hAAS5HntYyjwfBeVIKGc8V5U,1196
71
- magic_pdf/pipe/TXTPipe.py,sha256=R0UzMZ7Z_59Vh7cPdBAO4gvHtgA5wLoODnCPnpEjbPM,1255
72
- magic_pdf/pipe/UNIPipe.py,sha256=47a9jx1a_zO4m3sVnhcOnrmNc_QT-TI-9mv2x7L6SrQ,3507
92
+ magic_pdf/pipe/AbsPipe.py,sha256=rMZd0FRTxGWt-7MZNmjgI1bKXlmSb9ZTA6A9fhEE7Gk,4131
93
+ magic_pdf/pipe/OCRPipe.py,sha256=55VGQVxxjunnmt3L8tYlo9A8y3vVB1JRCO9wMQtk-N0,1317
94
+ magic_pdf/pipe/TXTPipe.py,sha256=2Xn0fDDbLm2qW6xtXXHsNwXlAKnMHFbiIgnP1J2zNh8,1376
95
+ magic_pdf/pipe/UNIPipe.py,sha256=0w1XLmUQUxvqm3BaVB800pZIeLiDD3NGvQb32OcI0Fg,3587
73
96
  magic_pdf/pipe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
97
  magic_pdf/post_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
98
  magic_pdf/post_proc/detect_para.py,sha256=5LX86ueHQGOV9CNimAxqZH4R3KTi78leum1de_Na0pw,126181
@@ -104,6 +127,9 @@ magic_pdf/pre_proc/remove_rotate_bbox.py,sha256=0FlBXeiEwjZAGAWo-DiMptclFOj04POu
104
127
  magic_pdf/pre_proc/resolve_bbox_conflict.py,sha256=bJiegofPUeDyi--oZjfipQ5Q5RLm6TOCW0TLXbPii_Q,7307
105
128
  magic_pdf/pre_proc/solve_line_alien.py,sha256=aNoQptPcC38Sm1I2ABhgw8jeH_5kjsRHx3VYlFFtm1g,853
106
129
  magic_pdf/pre_proc/statistics.py,sha256=_9jGlXq0iXd03UMxB92ZqCiu7cjNkG5vHvFlTF_9ytA,220
130
+ magic_pdf/resources/model_config/model_configs.yaml,sha256=C_9UfFMlHOX-iSgcwCHjyHKazKKuwpy1RcGHeTQD1kY,139
131
+ magic_pdf/resources/model_config/UniMERNet/demo.yaml,sha256=al9_--m3n2j9zEn9OjlmmpfQbqVBAYFakXc_hY4vDXo,807
132
+ magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml,sha256=noqVE3GmZUG86NYDfs9DiFgdQFjXaICyCU7KPzgR3II,6174
107
133
  magic_pdf/rw/AbsReaderWriter.py,sha256=1Hd6Xo2g12CaRAo5Sze-R_GSQA6GQ0rQwSmgQvw4V_c,1297
108
134
  magic_pdf/rw/DiskReaderWriter.py,sha256=0tt8lbRyqrOfFgGlhjt24YMdj2xN7QUIVysfhFIxPgo,2113
109
135
  magic_pdf/rw/S3ReaderWriter.py,sha256=O7Quf3CUqXBjMz4sIE7kNVI3TIQROeg5PuXneAacieY,4474
@@ -115,9 +141,9 @@ magic_pdf/train_utils/convert_to_train_format.py,sha256=ifo2FAoBMa_etCvz0O4v03xO
115
141
  magic_pdf/train_utils/extract_caption.py,sha256=gommEqIEWLplSDEJWD7_66daqlOBsWhpRBW1DHpkny4,1825
116
142
  magic_pdf/train_utils/remove_footer_header.py,sha256=pyeNNdJ-th3wl5Xwb10ZLYNaFN4-6BmahoMFE8VTNNs,5978
117
143
  magic_pdf/train_utils/vis_utils.py,sha256=MV9N9cT3ifJ35u7LFKGF9I_bOIQrtU1zcsxu2hj3aqM,10111
118
- magic_pdf-0.5.13.dist-info/LICENSE.md,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
119
- magic_pdf-0.5.13.dist-info/METADATA,sha256=g5VqQbFmBpLwZyVNivClRek2vVoBAGwhjuT8Tnq3Wtc,6673
120
- magic_pdf-0.5.13.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
121
- magic_pdf-0.5.13.dist-info/entry_points.txt,sha256=NbSkSmE08UuTwdoJD8Uofq8iyufySA4x7jmIIk4YCzI,57
122
- magic_pdf-0.5.13.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
123
- magic_pdf-0.5.13.dist-info/RECORD,,
144
+ magic_pdf-0.6.0.dist-info/LICENSE.md,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
145
+ magic_pdf-0.6.0.dist-info/METADATA,sha256=rqkC7PCOuqDzqnsVWW8DSqwogF3jHwGp6-sS8xjCi6o,7093
146
+ magic_pdf-0.6.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
147
+ magic_pdf-0.6.0.dist-info/entry_points.txt,sha256=NbSkSmE08UuTwdoJD8Uofq8iyufySA4x7jmIIk4YCzI,57
148
+ magic_pdf-0.6.0.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
149
+ magic_pdf-0.6.0.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- from ultralytics import YOLO
2
-
3
- image_path = '' # 待预测图片路径
4
- model_path = '' # 权重路径
5
- model = YOLO(model_path)
6
-
7
- result = model(image_path, save=True, conf=0.5, save_crop=False, line_width=2)
8
- print(result)