magic-pdf 0.5.13__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/cli/magicpdf.py +18 -7
- magic_pdf/libs/config_reader.py +10 -0
- magic_pdf/libs/version.py +1 -1
- magic_pdf/model/__init__.py +1 -0
- magic_pdf/model/doc_analyze_by_custom_model.py +38 -15
- magic_pdf/model/model_list.py +1 -0
- magic_pdf/model/pdf_extract_kit.py +196 -0
- magic_pdf/model/pek_sub_modules/__init__.py +0 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/__init__.py +0 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/backbone.py +179 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/beit.py +671 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/deit.py +476 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/__init__.py +7 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/__init__.py +2 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/cord.py +171 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/data_collator.py +124 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/funsd.py +136 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/image_utils.py +284 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/xfund.py +213 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/__init__.py +7 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py +24 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py +60 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py +1282 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py +32 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py +34 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/model_init.py +150 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/rcnn_vl.py +163 -0
- magic_pdf/model/pek_sub_modules/layoutlmv3/visualizer.py +1236 -0
- magic_pdf/model/pek_sub_modules/post_process.py +36 -0
- magic_pdf/model/pek_sub_modules/self_modify.py +260 -0
- magic_pdf/model/pp_structure_v2.py +7 -0
- magic_pdf/pipe/AbsPipe.py +8 -14
- magic_pdf/pipe/OCRPipe.py +12 -8
- magic_pdf/pipe/TXTPipe.py +12 -8
- magic_pdf/pipe/UNIPipe.py +9 -7
- magic_pdf/resources/model_config/UniMERNet/demo.yaml +46 -0
- magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml +351 -0
- magic_pdf/resources/model_config/model_configs.yaml +9 -0
- {magic_pdf-0.5.13.dist-info → magic_pdf-0.6.0.dist-info}/METADATA +18 -8
- {magic_pdf-0.5.13.dist-info → magic_pdf-0.6.0.dist-info}/RECORD +44 -18
- magic_pdf/model/360_layout_analysis.py +0 -8
- {magic_pdf-0.5.13.dist-info → magic_pdf-0.6.0.dist-info}/LICENSE.md +0 -0
- {magic_pdf-0.5.13.dist-info → magic_pdf-0.6.0.dist-info}/WHEEL +0 -0
- {magic_pdf-0.5.13.dist-info → magic_pdf-0.6.0.dist-info}/entry_points.txt +0 -0
- {magic_pdf-0.5.13.dist-info → magic_pdf-0.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,351 @@
|
|
1
|
+
AUG:
|
2
|
+
DETR: true
|
3
|
+
CACHE_DIR: /mnt/localdata/users/yupanhuang/cache/huggingface
|
4
|
+
CUDNN_BENCHMARK: false
|
5
|
+
DATALOADER:
|
6
|
+
ASPECT_RATIO_GROUPING: true
|
7
|
+
FILTER_EMPTY_ANNOTATIONS: false
|
8
|
+
NUM_WORKERS: 4
|
9
|
+
REPEAT_THRESHOLD: 0.0
|
10
|
+
SAMPLER_TRAIN: TrainingSampler
|
11
|
+
DATASETS:
|
12
|
+
PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
|
13
|
+
PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
|
14
|
+
PROPOSAL_FILES_TEST: []
|
15
|
+
PROPOSAL_FILES_TRAIN: []
|
16
|
+
TEST:
|
17
|
+
- scihub_train
|
18
|
+
TRAIN:
|
19
|
+
- scihub_train
|
20
|
+
GLOBAL:
|
21
|
+
HACK: 1.0
|
22
|
+
ICDAR_DATA_DIR_TEST: ''
|
23
|
+
ICDAR_DATA_DIR_TRAIN: ''
|
24
|
+
INPUT:
|
25
|
+
CROP:
|
26
|
+
ENABLED: true
|
27
|
+
SIZE:
|
28
|
+
- 384
|
29
|
+
- 600
|
30
|
+
TYPE: absolute_range
|
31
|
+
FORMAT: RGB
|
32
|
+
MASK_FORMAT: polygon
|
33
|
+
MAX_SIZE_TEST: 1333
|
34
|
+
MAX_SIZE_TRAIN: 1333
|
35
|
+
MIN_SIZE_TEST: 800
|
36
|
+
MIN_SIZE_TRAIN:
|
37
|
+
- 480
|
38
|
+
- 512
|
39
|
+
- 544
|
40
|
+
- 576
|
41
|
+
- 608
|
42
|
+
- 640
|
43
|
+
- 672
|
44
|
+
- 704
|
45
|
+
- 736
|
46
|
+
- 768
|
47
|
+
- 800
|
48
|
+
MIN_SIZE_TRAIN_SAMPLING: choice
|
49
|
+
RANDOM_FLIP: horizontal
|
50
|
+
MODEL:
|
51
|
+
ANCHOR_GENERATOR:
|
52
|
+
ANGLES:
|
53
|
+
- - -90
|
54
|
+
- 0
|
55
|
+
- 90
|
56
|
+
ASPECT_RATIOS:
|
57
|
+
- - 0.5
|
58
|
+
- 1.0
|
59
|
+
- 2.0
|
60
|
+
NAME: DefaultAnchorGenerator
|
61
|
+
OFFSET: 0.0
|
62
|
+
SIZES:
|
63
|
+
- - 32
|
64
|
+
- - 64
|
65
|
+
- - 128
|
66
|
+
- - 256
|
67
|
+
- - 512
|
68
|
+
BACKBONE:
|
69
|
+
FREEZE_AT: 2
|
70
|
+
NAME: build_vit_fpn_backbone
|
71
|
+
CONFIG_PATH: ''
|
72
|
+
DEVICE: cuda
|
73
|
+
FPN:
|
74
|
+
FUSE_TYPE: sum
|
75
|
+
IN_FEATURES:
|
76
|
+
- layer3
|
77
|
+
- layer5
|
78
|
+
- layer7
|
79
|
+
- layer11
|
80
|
+
NORM: ''
|
81
|
+
OUT_CHANNELS: 256
|
82
|
+
IMAGE_ONLY: true
|
83
|
+
KEYPOINT_ON: false
|
84
|
+
LOAD_PROPOSALS: false
|
85
|
+
MASK_ON: true
|
86
|
+
META_ARCHITECTURE: VLGeneralizedRCNN
|
87
|
+
PANOPTIC_FPN:
|
88
|
+
COMBINE:
|
89
|
+
ENABLED: true
|
90
|
+
INSTANCES_CONFIDENCE_THRESH: 0.5
|
91
|
+
OVERLAP_THRESH: 0.5
|
92
|
+
STUFF_AREA_LIMIT: 4096
|
93
|
+
INSTANCE_LOSS_WEIGHT: 1.0
|
94
|
+
PIXEL_MEAN:
|
95
|
+
- 127.5
|
96
|
+
- 127.5
|
97
|
+
- 127.5
|
98
|
+
PIXEL_STD:
|
99
|
+
- 127.5
|
100
|
+
- 127.5
|
101
|
+
- 127.5
|
102
|
+
PROPOSAL_GENERATOR:
|
103
|
+
MIN_SIZE: 0
|
104
|
+
NAME: RPN
|
105
|
+
RESNETS:
|
106
|
+
DEFORM_MODULATED: false
|
107
|
+
DEFORM_NUM_GROUPS: 1
|
108
|
+
DEFORM_ON_PER_STAGE:
|
109
|
+
- false
|
110
|
+
- false
|
111
|
+
- false
|
112
|
+
- false
|
113
|
+
DEPTH: 50
|
114
|
+
NORM: FrozenBN
|
115
|
+
NUM_GROUPS: 1
|
116
|
+
OUT_FEATURES:
|
117
|
+
- res4
|
118
|
+
RES2_OUT_CHANNELS: 256
|
119
|
+
RES5_DILATION: 1
|
120
|
+
STEM_OUT_CHANNELS: 64
|
121
|
+
STRIDE_IN_1X1: true
|
122
|
+
WIDTH_PER_GROUP: 64
|
123
|
+
RETINANET:
|
124
|
+
BBOX_REG_LOSS_TYPE: smooth_l1
|
125
|
+
BBOX_REG_WEIGHTS:
|
126
|
+
- 1.0
|
127
|
+
- 1.0
|
128
|
+
- 1.0
|
129
|
+
- 1.0
|
130
|
+
FOCAL_LOSS_ALPHA: 0.25
|
131
|
+
FOCAL_LOSS_GAMMA: 2.0
|
132
|
+
IN_FEATURES:
|
133
|
+
- p3
|
134
|
+
- p4
|
135
|
+
- p5
|
136
|
+
- p6
|
137
|
+
- p7
|
138
|
+
IOU_LABELS:
|
139
|
+
- 0
|
140
|
+
- -1
|
141
|
+
- 1
|
142
|
+
IOU_THRESHOLDS:
|
143
|
+
- 0.4
|
144
|
+
- 0.5
|
145
|
+
NMS_THRESH_TEST: 0.5
|
146
|
+
NORM: ''
|
147
|
+
NUM_CLASSES: 10
|
148
|
+
NUM_CONVS: 4
|
149
|
+
PRIOR_PROB: 0.01
|
150
|
+
SCORE_THRESH_TEST: 0.05
|
151
|
+
SMOOTH_L1_LOSS_BETA: 0.1
|
152
|
+
TOPK_CANDIDATES_TEST: 1000
|
153
|
+
ROI_BOX_CASCADE_HEAD:
|
154
|
+
BBOX_REG_WEIGHTS:
|
155
|
+
- - 10.0
|
156
|
+
- 10.0
|
157
|
+
- 5.0
|
158
|
+
- 5.0
|
159
|
+
- - 20.0
|
160
|
+
- 20.0
|
161
|
+
- 10.0
|
162
|
+
- 10.0
|
163
|
+
- - 30.0
|
164
|
+
- 30.0
|
165
|
+
- 15.0
|
166
|
+
- 15.0
|
167
|
+
IOUS:
|
168
|
+
- 0.5
|
169
|
+
- 0.6
|
170
|
+
- 0.7
|
171
|
+
ROI_BOX_HEAD:
|
172
|
+
BBOX_REG_LOSS_TYPE: smooth_l1
|
173
|
+
BBOX_REG_LOSS_WEIGHT: 1.0
|
174
|
+
BBOX_REG_WEIGHTS:
|
175
|
+
- 10.0
|
176
|
+
- 10.0
|
177
|
+
- 5.0
|
178
|
+
- 5.0
|
179
|
+
CLS_AGNOSTIC_BBOX_REG: true
|
180
|
+
CONV_DIM: 256
|
181
|
+
FC_DIM: 1024
|
182
|
+
NAME: FastRCNNConvFCHead
|
183
|
+
NORM: ''
|
184
|
+
NUM_CONV: 0
|
185
|
+
NUM_FC: 2
|
186
|
+
POOLER_RESOLUTION: 7
|
187
|
+
POOLER_SAMPLING_RATIO: 0
|
188
|
+
POOLER_TYPE: ROIAlignV2
|
189
|
+
SMOOTH_L1_BETA: 0.0
|
190
|
+
TRAIN_ON_PRED_BOXES: false
|
191
|
+
ROI_HEADS:
|
192
|
+
BATCH_SIZE_PER_IMAGE: 512
|
193
|
+
IN_FEATURES:
|
194
|
+
- p2
|
195
|
+
- p3
|
196
|
+
- p4
|
197
|
+
- p5
|
198
|
+
IOU_LABELS:
|
199
|
+
- 0
|
200
|
+
- 1
|
201
|
+
IOU_THRESHOLDS:
|
202
|
+
- 0.5
|
203
|
+
NAME: CascadeROIHeads
|
204
|
+
NMS_THRESH_TEST: 0.5
|
205
|
+
NUM_CLASSES: 10
|
206
|
+
POSITIVE_FRACTION: 0.25
|
207
|
+
PROPOSAL_APPEND_GT: true
|
208
|
+
SCORE_THRESH_TEST: 0.05
|
209
|
+
ROI_KEYPOINT_HEAD:
|
210
|
+
CONV_DIMS:
|
211
|
+
- 512
|
212
|
+
- 512
|
213
|
+
- 512
|
214
|
+
- 512
|
215
|
+
- 512
|
216
|
+
- 512
|
217
|
+
- 512
|
218
|
+
- 512
|
219
|
+
LOSS_WEIGHT: 1.0
|
220
|
+
MIN_KEYPOINTS_PER_IMAGE: 1
|
221
|
+
NAME: KRCNNConvDeconvUpsampleHead
|
222
|
+
NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
|
223
|
+
NUM_KEYPOINTS: 17
|
224
|
+
POOLER_RESOLUTION: 14
|
225
|
+
POOLER_SAMPLING_RATIO: 0
|
226
|
+
POOLER_TYPE: ROIAlignV2
|
227
|
+
ROI_MASK_HEAD:
|
228
|
+
CLS_AGNOSTIC_MASK: false
|
229
|
+
CONV_DIM: 256
|
230
|
+
NAME: MaskRCNNConvUpsampleHead
|
231
|
+
NORM: ''
|
232
|
+
NUM_CONV: 4
|
233
|
+
POOLER_RESOLUTION: 14
|
234
|
+
POOLER_SAMPLING_RATIO: 0
|
235
|
+
POOLER_TYPE: ROIAlignV2
|
236
|
+
RPN:
|
237
|
+
BATCH_SIZE_PER_IMAGE: 256
|
238
|
+
BBOX_REG_LOSS_TYPE: smooth_l1
|
239
|
+
BBOX_REG_LOSS_WEIGHT: 1.0
|
240
|
+
BBOX_REG_WEIGHTS:
|
241
|
+
- 1.0
|
242
|
+
- 1.0
|
243
|
+
- 1.0
|
244
|
+
- 1.0
|
245
|
+
BOUNDARY_THRESH: -1
|
246
|
+
CONV_DIMS:
|
247
|
+
- -1
|
248
|
+
HEAD_NAME: StandardRPNHead
|
249
|
+
IN_FEATURES:
|
250
|
+
- p2
|
251
|
+
- p3
|
252
|
+
- p4
|
253
|
+
- p5
|
254
|
+
- p6
|
255
|
+
IOU_LABELS:
|
256
|
+
- 0
|
257
|
+
- -1
|
258
|
+
- 1
|
259
|
+
IOU_THRESHOLDS:
|
260
|
+
- 0.3
|
261
|
+
- 0.7
|
262
|
+
LOSS_WEIGHT: 1.0
|
263
|
+
NMS_THRESH: 0.7
|
264
|
+
POSITIVE_FRACTION: 0.5
|
265
|
+
POST_NMS_TOPK_TEST: 1000
|
266
|
+
POST_NMS_TOPK_TRAIN: 2000
|
267
|
+
PRE_NMS_TOPK_TEST: 1000
|
268
|
+
PRE_NMS_TOPK_TRAIN: 2000
|
269
|
+
SMOOTH_L1_BETA: 0.0
|
270
|
+
SEM_SEG_HEAD:
|
271
|
+
COMMON_STRIDE: 4
|
272
|
+
CONVS_DIM: 128
|
273
|
+
IGNORE_VALUE: 255
|
274
|
+
IN_FEATURES:
|
275
|
+
- p2
|
276
|
+
- p3
|
277
|
+
- p4
|
278
|
+
- p5
|
279
|
+
LOSS_WEIGHT: 1.0
|
280
|
+
NAME: SemSegFPNHead
|
281
|
+
NORM: GN
|
282
|
+
NUM_CLASSES: 10
|
283
|
+
VIT:
|
284
|
+
DROP_PATH: 0.1
|
285
|
+
IMG_SIZE:
|
286
|
+
- 224
|
287
|
+
- 224
|
288
|
+
NAME: layoutlmv3_base
|
289
|
+
OUT_FEATURES:
|
290
|
+
- layer3
|
291
|
+
- layer5
|
292
|
+
- layer7
|
293
|
+
- layer11
|
294
|
+
POS_TYPE: abs
|
295
|
+
WEIGHTS:
|
296
|
+
OUTPUT_DIR:
|
297
|
+
SCIHUB_DATA_DIR_TRAIN: /mnt/petrelfs/share_data/zhaozhiyuan/publaynet/layout_scihub/train
|
298
|
+
SEED: 42
|
299
|
+
SOLVER:
|
300
|
+
AMP:
|
301
|
+
ENABLED: true
|
302
|
+
BACKBONE_MULTIPLIER: 1.0
|
303
|
+
BASE_LR: 0.0002
|
304
|
+
BIAS_LR_FACTOR: 1.0
|
305
|
+
CHECKPOINT_PERIOD: 2000
|
306
|
+
CLIP_GRADIENTS:
|
307
|
+
CLIP_TYPE: full_model
|
308
|
+
CLIP_VALUE: 1.0
|
309
|
+
ENABLED: true
|
310
|
+
NORM_TYPE: 2.0
|
311
|
+
GAMMA: 0.1
|
312
|
+
GRADIENT_ACCUMULATION_STEPS: 1
|
313
|
+
IMS_PER_BATCH: 32
|
314
|
+
LR_SCHEDULER_NAME: WarmupCosineLR
|
315
|
+
MAX_ITER: 20000
|
316
|
+
MOMENTUM: 0.9
|
317
|
+
NESTEROV: false
|
318
|
+
OPTIMIZER: ADAMW
|
319
|
+
REFERENCE_WORLD_SIZE: 0
|
320
|
+
STEPS:
|
321
|
+
- 10000
|
322
|
+
WARMUP_FACTOR: 0.01
|
323
|
+
WARMUP_ITERS: 333
|
324
|
+
WARMUP_METHOD: linear
|
325
|
+
WEIGHT_DECAY: 0.05
|
326
|
+
WEIGHT_DECAY_BIAS: null
|
327
|
+
WEIGHT_DECAY_NORM: 0.0
|
328
|
+
TEST:
|
329
|
+
AUG:
|
330
|
+
ENABLED: false
|
331
|
+
FLIP: true
|
332
|
+
MAX_SIZE: 4000
|
333
|
+
MIN_SIZES:
|
334
|
+
- 400
|
335
|
+
- 500
|
336
|
+
- 600
|
337
|
+
- 700
|
338
|
+
- 800
|
339
|
+
- 900
|
340
|
+
- 1000
|
341
|
+
- 1100
|
342
|
+
- 1200
|
343
|
+
DETECTIONS_PER_IMAGE: 100
|
344
|
+
EVAL_PERIOD: 1000
|
345
|
+
EXPECTED_RESULTS: []
|
346
|
+
KEYPOINT_OKS_SIGMAS: []
|
347
|
+
PRECISE_BN:
|
348
|
+
ENABLED: false
|
349
|
+
NUM_ITER: 200
|
350
|
+
VERSION: 2
|
351
|
+
VIS_PERIOD: 0
|
@@ -1,8 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: magic-pdf
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.0
|
4
4
|
Summary: A practical tool for converting PDF to Markdown
|
5
|
-
Home-page: https://github.com/
|
5
|
+
Home-page: https://github.com/opendatalab/MinerU
|
6
6
|
Requires-Python: >=3.9
|
7
7
|
Description-Content-Type: text/markdown
|
8
8
|
License-File: LICENSE.md
|
@@ -12,16 +12,21 @@ Requires-Dist: click >=8.1.7
|
|
12
12
|
Requires-Dist: PyMuPDF >=1.24.7
|
13
13
|
Requires-Dist: loguru >=0.6.0
|
14
14
|
Requires-Dist: numpy >=1.21.6
|
15
|
-
Requires-Dist: fast-langdetect >=0.
|
15
|
+
Requires-Dist: fast-langdetect >=0.2.1
|
16
16
|
Requires-Dist: wordninja >=2.0.0
|
17
17
|
Requires-Dist: scikit-learn >=1.0.2
|
18
18
|
Requires-Dist: pdfminer.six >=20231228
|
19
|
-
Requires-Dist: numpy <2.0.0
|
20
19
|
Provides-Extra: cpu
|
21
|
-
Requires-Dist: paddleocr ; extra == 'cpu'
|
20
|
+
Requires-Dist: paddleocr ==2.7.3 ; extra == 'cpu'
|
22
21
|
Requires-Dist: paddlepaddle ; extra == 'cpu'
|
22
|
+
Provides-Extra: full-cpu
|
23
|
+
Requires-Dist: unimernet ; extra == 'full-cpu'
|
24
|
+
Requires-Dist: matplotlib ; extra == 'full-cpu'
|
25
|
+
Requires-Dist: ultralytics ; extra == 'full-cpu'
|
26
|
+
Requires-Dist: paddleocr ==2.7.3 ; extra == 'full-cpu'
|
27
|
+
Requires-Dist: paddlepaddle ; extra == 'full-cpu'
|
23
28
|
Provides-Extra: gpu
|
24
|
-
Requires-Dist: paddleocr ; extra == 'gpu'
|
29
|
+
Requires-Dist: paddleocr ==2.7.3 ; extra == 'gpu'
|
25
30
|
Requires-Dist: paddlepaddle-gpu ; extra == 'gpu'
|
26
31
|
|
27
32
|
<div id="top"></div>
|
@@ -29,9 +34,14 @@ Requires-Dist: paddlepaddle-gpu ; extra == 'gpu'
|
|
29
34
|
|
30
35
|
[](https://github.com/opendatalab/MinerU)
|
31
36
|
[](https://github.com/opendatalab/MinerU)
|
32
|
-
[](https://github.com/opendatalab/MinerU/tree/main/LICENSE)
|
33
|
-
[](https://github.com/opendatalab/MinerU/issues)
|
34
37
|
[](https://github.com/opendatalab/MinerU/issues)
|
38
|
+
[](https://github.com/opendatalab/MinerU/issues)
|
39
|
+
[](https://badge.fury.io/py/magic-pdf)
|
40
|
+
[](https://pepy.tech/project/magic-pdf)
|
41
|
+
[](https://pepy.tech/project/magic-pdf)
|
42
|
+
|
43
|
+
|
44
|
+
|
35
45
|
|
36
46
|
[English](README.md) | [简体中文](README_zh-CN.md)
|
37
47
|
|
@@ -5,7 +5,7 @@ magic_pdf/pdf_parse_for_train.py,sha256=Oby61DMjJ716Jj_ri7lwXfv2Chus0pbBR2RPXrmB
|
|
5
5
|
magic_pdf/pdf_parse_union_core.py,sha256=a67iQuEfuslAEF-wQplGZKXUuz5mT3HiCyvuR52E6Gw,10584
|
6
6
|
magic_pdf/user_api.py,sha256=CVQH-VSiZpz0bSkyMT4czk1epZriIPSJsLsPbluPa9Q,3054
|
7
7
|
magic_pdf/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
-
magic_pdf/cli/magicpdf.py,sha256=
|
8
|
+
magic_pdf/cli/magicpdf.py,sha256=EcTiX-MaiDc4Fv9qZ_UdjHt5tYnBEu6vlbp0w030sA0,12691
|
9
9
|
magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
magic_pdf/dict2md/mkcontent.py,sha256=rWUY-2opd0jeowEUEVOV_uWcKum1Q7ng4nOoT6-ka_s,17459
|
11
11
|
magic_pdf/dict2md/ocr_mkcontent.py,sha256=RyxebPtvFfNce_HCa-_YGxwFx_srzL-BfMKc85V9JG0,15442
|
@@ -25,7 +25,7 @@ magic_pdf/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
magic_pdf/libs/boxbase.py,sha256=MvD0DypR4sTEF3T2RrI_yJ8mPDUBYHAqAaau2mnBSxY,15343
|
26
26
|
magic_pdf/libs/calc_span_stats.py,sha256=5vnU27DcbkFDRSAoLqAmX0KQ3I9ehWkEgh_t9hxg_zI,10147
|
27
27
|
magic_pdf/libs/commons.py,sha256=6Zu9-OyamyCNDY7qj0SxR-rux-ggj9im3CVPtC4ubB8,7108
|
28
|
-
magic_pdf/libs/config_reader.py,sha256=
|
28
|
+
magic_pdf/libs/config_reader.py,sha256=wB0Zn6qEwuAWmv2Icz9owPIqxrhFEH5i6sUr8Nt5ULo,1806
|
29
29
|
magic_pdf/libs/convert_utils.py,sha256=Ov-lsfCLBPz_15iSJXIslBNmrSf_E_1g_XDWJy8NgO8,143
|
30
30
|
magic_pdf/libs/coordinate_transform.py,sha256=Bbop2cP2uz2ZG0U0gwd7J6EKkgABq5Rv03qf2LMPw80,429
|
31
31
|
magic_pdf/libs/detect_language_from_model.py,sha256=Uln8F9qs8EJOw4EgI7KRlaU3lD_mK8KMTlADLFtz8fk,816
|
@@ -44,14 +44,37 @@ magic_pdf/libs/pdf_check.py,sha256=MAe8wzwT0qvPf_I72wEZG7k1g4haNHS7oUtLqkB5rlE,2
|
|
44
44
|
magic_pdf/libs/pdf_image_tools.py,sha256=CAd01giTKr_UJz1_QtDOARG9G9z69GFpzRZwcWSfLtE,1282
|
45
45
|
magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
|
46
46
|
magic_pdf/libs/textbase.py,sha256=SC1Frhz3Fb7V7n2SFRBsl7Bmg0JZdlvZskq0lfW1vIk,732
|
47
|
-
magic_pdf/libs/version.py,sha256=
|
47
|
+
magic_pdf/libs/version.py,sha256=cID1jLnC_vj48GgMN6Yb1FA3JsQ95zNmCHmRYE8TFhY,22
|
48
48
|
magic_pdf/libs/vis_utils.py,sha256=hTOTEakKV0pGMbk0tbRkVI_tku7A3dGc96ynObZ4kwI,10207
|
49
|
-
magic_pdf/model/
|
50
|
-
magic_pdf/model/
|
51
|
-
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=8z4NX7Lk7CcPl1BQiNYL6dDiP63M3f6m3dmW6rjHCqg,2370
|
49
|
+
magic_pdf/model/__init__.py,sha256=XeYcF4RMZ3DosyLqiz0_n1JVa2k5RhTwUXwKt5sAjEQ,53
|
50
|
+
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=kssz_Nn6zTYED_iEgGuFRjus947xoK5dTqj88FOehE0,3256
|
52
51
|
magic_pdf/model/magic_model.py,sha256=2H6Gz1mg0f0YCvz-TLIWrAWXCQLgZftBXJNRPlSIjwc,25077
|
53
|
-
magic_pdf/model/model_list.py,sha256=
|
54
|
-
magic_pdf/model/
|
52
|
+
magic_pdf/model/model_list.py,sha256=AqxAtKGLDn7VVXWYwk0l9LnACxDLyU2jwOJ7vjPZj04,72
|
53
|
+
magic_pdf/model/pdf_extract_kit.py,sha256=hiK1zDrwn5QhqUwI7BvM1JOoq_JIab4uVx_flHrBmWE,8374
|
54
|
+
magic_pdf/model/pp_structure_v2.py,sha256=apYWwWiCjlks5CLXolcynnuPV7llCm2PdP-6tg0-Kt0,2903
|
55
|
+
magic_pdf/model/pek_sub_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
|
+
magic_pdf/model/pek_sub_modules/post_process.py,sha256=HzRxV2sVR3Qo8XKYEHhT6tae-bYTb6dnAfGP6gfVNaM,1135
|
57
|
+
magic_pdf/model/pek_sub_modules/self_modify.py,sha256=XiwLUCiY_E0JkaIQr5m1hOD75-iGrgkMVe-1bzeF_Go,10522
|
58
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
59
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/backbone.py,sha256=1cvSCczgvwOLdvzWyqttoYPMHsXmnzI3w9abJ1bAXoM,7106
|
60
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/beit.py,sha256=e-INve6bpEx_0FM5wYbQcEcelc79tzDlCljTVHaGt1w,30450
|
61
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/deit.py,sha256=Qyn5UWutZ-0GJczexCh-oMMSXtav_g3ovumMFJp8Om4,17000
|
62
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/model_init.py,sha256=POs5s4_9rS-GlE7f_iHBuZpTwOuyfI6VE3DUb37fgxA,4483
|
63
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/rcnn_vl.py,sha256=nI4G6AeLRmjavNhs5S2USKh0ozn-ftMuW0F0m_eVy3c,6649
|
64
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/visualizer.py,sha256=H6UYeCCbaN2gbDjGthTkKkPoyWxfE3azRjsR7fVBwnw,49797
|
65
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/__init__.py,sha256=C4N9gXJr7is7uznvQefQ7dOhlzEhdp86Lgh-7p0Y-08,186
|
66
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/__init__.py,sha256=W7V62JOh12NdMZj2H1sde3Il0AqW2VKplmHEsLle6tg,76
|
67
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/cord.py,sha256=jR_lRZxy8SeEvTK3FdlXmQHF0kefJf7ZqwM_8pvyI5E,8153
|
68
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/data_collator.py,sha256=M2TE47BprHSuQJYcoMeWOSpqkr_nh8VK6t2l26XWmxg,6279
|
69
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/funsd.py,sha256=Ez9tMeruHncJlkKQ7iRGBB9Pk1uWtgxlGeqs-sOmIG0,5214
|
70
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/image_utils.py,sha256=vuNOMzYw_h7jmaD2XUqkGlrjDEPB7XUts16GRICBmG4,10334
|
71
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/data/xfund.py,sha256=6jLKyc_4VhbHY4YEzBXm5RkPdsd9ldnUGXFZBLiJ-_s,8270
|
72
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/__init__.py,sha256=d5bm3Rx-jTrgfJDWrzD7t5R5CdHfug9dCNvUEneIYW4,190
|
73
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py,sha256=a04w_C0B4P9jF-3I_tXCj3fLmfFQR5XSKGbhgGm--pM,1216
|
74
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py,sha256=CJBcAmmLeRFVMN1YjWefoUW7hk0KXek0Eb_tergKl4Y,2150
|
75
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py,sha256=mdo8tO-DrJcv0Lbk9Pp98n3NQXYOnFFyXQWjU7t35kA,54633
|
76
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py,sha256=diKlrfxYjKAmYrUgjYdx-FXLh-swShC3tl-EBX1b3oI,1197
|
77
|
+
magic_pdf/model/pek_sub_modules/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py,sha256=0lxiG69_fGpSSBYA9CBLnDa_qqa1rInZ0pJpqBwZ0Yw,1372
|
55
78
|
magic_pdf/para/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
79
|
magic_pdf/para/block_continuation_processor.py,sha256=IkReB5hirjm0OAirNzQQpnWe2f2bdP3Hug3Ef8qTRDA,22749
|
57
80
|
magic_pdf/para/block_termination_processor.py,sha256=YU3ZYqJy9e3OQmOuQYZrR6AUpmAlQ0mhj0PgZZPZ_fM,17957
|
@@ -66,10 +89,10 @@ magic_pdf/para/para_split_v2.py,sha256=jGOhsubdh_CEgSv9WMNmp1loq1YNlpcAj3yh3g0gP
|
|
66
89
|
magic_pdf/para/raw_processor.py,sha256=mHxD9FrdOSXH7NqM41s55URyCyuyACvm9kKtowkIb3k,6317
|
67
90
|
magic_pdf/para/stats.py,sha256=-6Pf9Y8jkP1uJOYWiHUjw9Lb-Fb9GY7MHr_ok7x2GX0,9731
|
68
91
|
magic_pdf/para/title_processor.py,sha256=pYZv9vEkIjAtCz8jIUtl9AVUy_ib5SdAZmMVoZtsMRI,38593
|
69
|
-
magic_pdf/pipe/AbsPipe.py,sha256=
|
70
|
-
magic_pdf/pipe/OCRPipe.py,sha256=
|
71
|
-
magic_pdf/pipe/TXTPipe.py,sha256=
|
72
|
-
magic_pdf/pipe/UNIPipe.py,sha256=
|
92
|
+
magic_pdf/pipe/AbsPipe.py,sha256=rMZd0FRTxGWt-7MZNmjgI1bKXlmSb9ZTA6A9fhEE7Gk,4131
|
93
|
+
magic_pdf/pipe/OCRPipe.py,sha256=55VGQVxxjunnmt3L8tYlo9A8y3vVB1JRCO9wMQtk-N0,1317
|
94
|
+
magic_pdf/pipe/TXTPipe.py,sha256=2Xn0fDDbLm2qW6xtXXHsNwXlAKnMHFbiIgnP1J2zNh8,1376
|
95
|
+
magic_pdf/pipe/UNIPipe.py,sha256=0w1XLmUQUxvqm3BaVB800pZIeLiDD3NGvQb32OcI0Fg,3587
|
73
96
|
magic_pdf/pipe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
74
97
|
magic_pdf/post_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
75
98
|
magic_pdf/post_proc/detect_para.py,sha256=5LX86ueHQGOV9CNimAxqZH4R3KTi78leum1de_Na0pw,126181
|
@@ -104,6 +127,9 @@ magic_pdf/pre_proc/remove_rotate_bbox.py,sha256=0FlBXeiEwjZAGAWo-DiMptclFOj04POu
|
|
104
127
|
magic_pdf/pre_proc/resolve_bbox_conflict.py,sha256=bJiegofPUeDyi--oZjfipQ5Q5RLm6TOCW0TLXbPii_Q,7307
|
105
128
|
magic_pdf/pre_proc/solve_line_alien.py,sha256=aNoQptPcC38Sm1I2ABhgw8jeH_5kjsRHx3VYlFFtm1g,853
|
106
129
|
magic_pdf/pre_proc/statistics.py,sha256=_9jGlXq0iXd03UMxB92ZqCiu7cjNkG5vHvFlTF_9ytA,220
|
130
|
+
magic_pdf/resources/model_config/model_configs.yaml,sha256=C_9UfFMlHOX-iSgcwCHjyHKazKKuwpy1RcGHeTQD1kY,139
|
131
|
+
magic_pdf/resources/model_config/UniMERNet/demo.yaml,sha256=al9_--m3n2j9zEn9OjlmmpfQbqVBAYFakXc_hY4vDXo,807
|
132
|
+
magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml,sha256=noqVE3GmZUG86NYDfs9DiFgdQFjXaICyCU7KPzgR3II,6174
|
107
133
|
magic_pdf/rw/AbsReaderWriter.py,sha256=1Hd6Xo2g12CaRAo5Sze-R_GSQA6GQ0rQwSmgQvw4V_c,1297
|
108
134
|
magic_pdf/rw/DiskReaderWriter.py,sha256=0tt8lbRyqrOfFgGlhjt24YMdj2xN7QUIVysfhFIxPgo,2113
|
109
135
|
magic_pdf/rw/S3ReaderWriter.py,sha256=O7Quf3CUqXBjMz4sIE7kNVI3TIQROeg5PuXneAacieY,4474
|
@@ -115,9 +141,9 @@ magic_pdf/train_utils/convert_to_train_format.py,sha256=ifo2FAoBMa_etCvz0O4v03xO
|
|
115
141
|
magic_pdf/train_utils/extract_caption.py,sha256=gommEqIEWLplSDEJWD7_66daqlOBsWhpRBW1DHpkny4,1825
|
116
142
|
magic_pdf/train_utils/remove_footer_header.py,sha256=pyeNNdJ-th3wl5Xwb10ZLYNaFN4-6BmahoMFE8VTNNs,5978
|
117
143
|
magic_pdf/train_utils/vis_utils.py,sha256=MV9N9cT3ifJ35u7LFKGF9I_bOIQrtU1zcsxu2hj3aqM,10111
|
118
|
-
magic_pdf-0.
|
119
|
-
magic_pdf-0.
|
120
|
-
magic_pdf-0.
|
121
|
-
magic_pdf-0.
|
122
|
-
magic_pdf-0.
|
123
|
-
magic_pdf-0.
|
144
|
+
magic_pdf-0.6.0.dist-info/LICENSE.md,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
145
|
+
magic_pdf-0.6.0.dist-info/METADATA,sha256=rqkC7PCOuqDzqnsVWW8DSqwogF3jHwGp6-sS8xjCi6o,7093
|
146
|
+
magic_pdf-0.6.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
147
|
+
magic_pdf-0.6.0.dist-info/entry_points.txt,sha256=NbSkSmE08UuTwdoJD8Uofq8iyufySA4x7jmIIk4YCzI,57
|
148
|
+
magic_pdf-0.6.0.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
|
149
|
+
magic_pdf-0.6.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|