magic-pdf 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. magic_pdf/data/batch_build_dataset.py +156 -0
  2. magic_pdf/data/dataset.py +44 -24
  3. magic_pdf/data/utils.py +108 -9
  4. magic_pdf/dict2md/ocr_mkcontent.py +4 -3
  5. magic_pdf/libs/pdf_image_tools.py +11 -6
  6. magic_pdf/libs/performance_stats.py +12 -1
  7. magic_pdf/libs/version.py +1 -1
  8. magic_pdf/model/batch_analyze.py +175 -201
  9. magic_pdf/model/doc_analyze_by_custom_model.py +137 -92
  10. magic_pdf/model/pdf_extract_kit.py +5 -38
  11. magic_pdf/model/sub_modules/language_detection/utils.py +2 -4
  12. magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py +24 -19
  13. magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py +3 -1
  14. magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py +3 -1
  15. magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py +31 -102
  16. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py +13 -0
  17. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py +189 -0
  18. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py +8 -0
  19. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py +163 -0
  20. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py +2351 -0
  21. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py +9 -0
  22. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py +132 -0
  23. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py +132 -0
  24. magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py +1084 -0
  25. magic_pdf/model/sub_modules/model_init.py +50 -37
  26. magic_pdf/model/sub_modules/model_utils.py +17 -11
  27. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py +1 -0
  28. magic_pdf/model/sub_modules/ocr/{paddleocr → paddleocr2pytorch}/ocr_utils.py +102 -97
  29. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py +193 -0
  30. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py +39 -0
  31. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py +8 -0
  32. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py +48 -0
  33. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py +418 -0
  34. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py +25 -0
  35. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py +105 -0
  36. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py +62 -0
  37. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py +269 -0
  38. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py +290 -0
  39. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py +516 -0
  40. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py +136 -0
  41. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py +234 -0
  42. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py +638 -0
  43. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py +76 -0
  44. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py +43 -0
  45. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py +23 -0
  46. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py +109 -0
  47. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py +54 -0
  48. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py +58 -0
  49. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py +29 -0
  50. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py +456 -0
  51. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py +117 -0
  52. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py +228 -0
  53. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py +33 -0
  54. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py +20 -0
  55. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py +179 -0
  56. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py +690 -0
  57. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py +0 -0
  58. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml +383 -0
  59. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt +162 -0
  60. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt +8421 -0
  61. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt +163 -0
  62. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt +167 -0
  63. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt +95 -0
  64. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt +4399 -0
  65. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt +153 -0
  66. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt +3688 -0
  67. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt +185 -0
  68. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt +6623 -0
  69. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt +128 -0
  70. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt +151 -0
  71. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml +49 -0
  72. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py +1 -0
  73. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/__init__.py +1 -0
  74. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_cls.py +106 -0
  75. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_det.py +217 -0
  76. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_rec.py +440 -0
  77. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_system.py +104 -0
  78. magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py +227 -0
  79. magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py +10 -18
  80. magic_pdf/pdf_parse_union_core_v2.py +112 -74
  81. magic_pdf/post_proc/para_split_v3.py +16 -13
  82. magic_pdf/pre_proc/ocr_dict_merge.py +9 -1
  83. magic_pdf/pre_proc/ocr_span_list_modify.py +51 -0
  84. magic_pdf/resources/model_config/model_configs.yaml +1 -1
  85. magic_pdf/tools/cli.py +30 -12
  86. magic_pdf/tools/common.py +90 -12
  87. {magic_pdf-1.2.1.dist-info → magic_pdf-1.3.0.dist-info}/METADATA +51 -41
  88. magic_pdf-1.3.0.dist-info/RECORD +202 -0
  89. magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py +0 -204
  90. magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_291_mod.py +0 -213
  91. magic_pdf/model/sub_modules/table/structeqtable/struct_eqtable.py +0 -37
  92. magic_pdf/model/sub_modules/table/tablemaster/tablemaster_paddle.py +0 -71
  93. magic_pdf/resources/model_config/UniMERNet/demo.yaml +0 -46
  94. magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml +0 -351
  95. magic_pdf-1.2.1.dist-info/RECORD +0 -147
  96. /magic_pdf/model/sub_modules/{ocr/paddleocr/__init__.py → mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py} +0 -0
  97. /magic_pdf/model/sub_modules/{table/structeqtable → ocr/paddleocr2pytorch/pytorchocr}/__init__.py +0 -0
  98. /magic_pdf/model/sub_modules/{table/tablemaster → ocr/paddleocr2pytorch/pytorchocr/modeling}/__init__.py +0 -0
  99. {magic_pdf-1.2.1.dist-info → magic_pdf-1.3.0.dist-info}/LICENSE.md +0 -0
  100. {magic_pdf-1.2.1.dist-info → magic_pdf-1.3.0.dist-info}/WHEEL +0 -0
  101. {magic_pdf-1.2.1.dist-info → magic_pdf-1.3.0.dist-info}/entry_points.txt +0 -0
  102. {magic_pdf-1.2.1.dist-info → magic_pdf-1.3.0.dist-info}/top_level.txt +0 -0
@@ -1,37 +0,0 @@
1
- import torch
2
- from struct_eqtable import build_model
3
-
4
- from magic_pdf.model.sub_modules.table.table_utils import minify_html
5
-
6
-
7
- class StructTableModel:
8
- def __init__(self, model_path, max_new_tokens=1024, max_time=60):
9
- # init
10
- assert torch.cuda.is_available(), "CUDA must be available for StructEqTable model."
11
- self.model = build_model(
12
- model_ckpt=model_path,
13
- max_new_tokens=max_new_tokens,
14
- max_time=max_time,
15
- lmdeploy=False,
16
- flash_attn=False,
17
- batch_size=1,
18
- ).cuda()
19
- self.default_format = "html"
20
-
21
- def predict(self, images, output_format=None, **kwargs):
22
-
23
- if output_format is None:
24
- output_format = self.default_format
25
- else:
26
- if output_format not in ['latex', 'markdown', 'html']:
27
- raise ValueError(f"Output format {output_format} is not supported.")
28
-
29
- results = self.model(
30
- images, output_format=output_format
31
- )
32
-
33
- if output_format == "html":
34
- results = [minify_html(html) for html in results]
35
-
36
- return results
37
-
@@ -1,71 +0,0 @@
1
- import os
2
-
3
- import cv2
4
- import numpy as np
5
- from ppstructure.table.predict_table import TableSystem
6
- from ppstructure.utility import init_args
7
- from PIL import Image
8
-
9
- from magic_pdf.config.constants import * # noqa: F403
10
-
11
-
12
- class TableMasterPaddleModel(object):
13
- """This class is responsible for converting image of table into HTML format
14
- using a pre-trained model.
15
-
16
- Attributes:
17
- - table_sys: An instance of TableSystem initialized with parsed arguments.
18
-
19
- Methods:
20
- - __init__(config): Initializes the model with configuration parameters.
21
- - img2html(image): Converts a PIL Image or NumPy array to HTML string.
22
- - parse_args(**kwargs): Parses configuration arguments.
23
- """
24
-
25
- def __init__(self, config):
26
- """
27
- Parameters:
28
- - config (dict): Configuration dictionary containing model_dir and device.
29
- """
30
- args = self.parse_args(**config)
31
- self.table_sys = TableSystem(args)
32
-
33
- def img2html(self, image):
34
- """
35
- Parameters:
36
- - image (PIL.Image or np.ndarray): The image of the table to be converted.
37
-
38
- Return:
39
- - HTML (str): A string representing the HTML structure with content of the table.
40
- """
41
- if isinstance(image, Image.Image):
42
- image = np.asarray(image)
43
- image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
44
- pred_res, _ = self.table_sys(image)
45
- pred_html = pred_res['html']
46
- # res = '<td><table border="1">' + pred_html.replace("<html><body><table>", "").replace(
47
- # "</table></body></html>","") + "</table></td>\n"
48
- return pred_html
49
-
50
- def parse_args(self, **kwargs):
51
- parser = init_args()
52
- model_dir = kwargs.get('model_dir')
53
- table_model_dir = os.path.join(model_dir, TABLE_MASTER_DIR) # noqa: F405
54
- table_char_dict_path = os.path.join(model_dir, TABLE_MASTER_DICT) # noqa: F405
55
- det_model_dir = os.path.join(model_dir, DETECT_MODEL_DIR) # noqa: F405
56
- rec_model_dir = os.path.join(model_dir, REC_MODEL_DIR) # noqa: F405
57
- rec_char_dict_path = os.path.join(model_dir, REC_CHAR_DICT) # noqa: F405
58
- device = kwargs.get('device', 'cpu')
59
- use_gpu = True if device.startswith('cuda') else False
60
- config = {
61
- 'use_gpu': use_gpu,
62
- 'table_max_len': kwargs.get('table_max_len', TABLE_MAX_LEN), # noqa: F405
63
- 'table_algorithm': 'TableMaster',
64
- 'table_model_dir': table_model_dir,
65
- 'table_char_dict_path': table_char_dict_path,
66
- 'det_model_dir': det_model_dir,
67
- 'rec_model_dir': rec_model_dir,
68
- 'rec_char_dict_path': rec_char_dict_path,
69
- }
70
- parser.set_defaults(**config)
71
- return parser.parse_args([])
@@ -1,46 +0,0 @@
1
- model:
2
- arch: unimernet
3
- model_type: unimernet
4
- model_config:
5
- model_name: ./models/unimernet_base
6
- max_seq_len: 1536
7
-
8
- load_pretrained: True
9
- pretrained: './models/unimernet_base/pytorch_model.pth'
10
- tokenizer_config:
11
- path: ./models/unimernet_base
12
-
13
- datasets:
14
- formula_rec_eval:
15
- vis_processor:
16
- eval:
17
- name: "formula_image_eval"
18
- image_size:
19
- - 192
20
- - 672
21
-
22
- run:
23
- runner: runner_iter
24
- task: unimernet_train
25
-
26
- batch_size_train: 64
27
- batch_size_eval: 64
28
- num_workers: 1
29
-
30
- iters_per_inner_epoch: 2000
31
- max_iters: 60000
32
-
33
- seed: 42
34
- output_dir: "../output/demo"
35
-
36
- evaluate: True
37
- test_splits: [ "eval" ]
38
-
39
- device: "cuda"
40
- world_size: 1
41
- dist_url: "env://"
42
- distributed: True
43
- distributed_type: ddp # or fsdp when train llm
44
-
45
- generate_cfg:
46
- temperature: 0.0
@@ -1,351 +0,0 @@
1
- AUG:
2
- DETR: true
3
- CACHE_DIR: ~/cache/huggingface
4
- CUDNN_BENCHMARK: false
5
- DATALOADER:
6
- ASPECT_RATIO_GROUPING: true
7
- FILTER_EMPTY_ANNOTATIONS: false
8
- NUM_WORKERS: 4
9
- REPEAT_THRESHOLD: 0.0
10
- SAMPLER_TRAIN: TrainingSampler
11
- DATASETS:
12
- PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
13
- PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
14
- PROPOSAL_FILES_TEST: []
15
- PROPOSAL_FILES_TRAIN: []
16
- TEST:
17
- - scihub_train
18
- TRAIN:
19
- - scihub_train
20
- GLOBAL:
21
- HACK: 1.0
22
- ICDAR_DATA_DIR_TEST: ''
23
- ICDAR_DATA_DIR_TRAIN: ''
24
- INPUT:
25
- CROP:
26
- ENABLED: true
27
- SIZE:
28
- - 384
29
- - 600
30
- TYPE: absolute_range
31
- FORMAT: RGB
32
- MASK_FORMAT: polygon
33
- MAX_SIZE_TEST: 1333
34
- MAX_SIZE_TRAIN: 1333
35
- MIN_SIZE_TEST: 800
36
- MIN_SIZE_TRAIN:
37
- - 480
38
- - 512
39
- - 544
40
- - 576
41
- - 608
42
- - 640
43
- - 672
44
- - 704
45
- - 736
46
- - 768
47
- - 800
48
- MIN_SIZE_TRAIN_SAMPLING: choice
49
- RANDOM_FLIP: horizontal
50
- MODEL:
51
- ANCHOR_GENERATOR:
52
- ANGLES:
53
- - - -90
54
- - 0
55
- - 90
56
- ASPECT_RATIOS:
57
- - - 0.5
58
- - 1.0
59
- - 2.0
60
- NAME: DefaultAnchorGenerator
61
- OFFSET: 0.0
62
- SIZES:
63
- - - 32
64
- - - 64
65
- - - 128
66
- - - 256
67
- - - 512
68
- BACKBONE:
69
- FREEZE_AT: 2
70
- NAME: build_vit_fpn_backbone
71
- CONFIG_PATH: ''
72
- DEVICE: cuda
73
- FPN:
74
- FUSE_TYPE: sum
75
- IN_FEATURES:
76
- - layer3
77
- - layer5
78
- - layer7
79
- - layer11
80
- NORM: ''
81
- OUT_CHANNELS: 256
82
- IMAGE_ONLY: true
83
- KEYPOINT_ON: false
84
- LOAD_PROPOSALS: false
85
- MASK_ON: true
86
- META_ARCHITECTURE: VLGeneralizedRCNN
87
- PANOPTIC_FPN:
88
- COMBINE:
89
- ENABLED: true
90
- INSTANCES_CONFIDENCE_THRESH: 0.5
91
- OVERLAP_THRESH: 0.5
92
- STUFF_AREA_LIMIT: 4096
93
- INSTANCE_LOSS_WEIGHT: 1.0
94
- PIXEL_MEAN:
95
- - 127.5
96
- - 127.5
97
- - 127.5
98
- PIXEL_STD:
99
- - 127.5
100
- - 127.5
101
- - 127.5
102
- PROPOSAL_GENERATOR:
103
- MIN_SIZE: 0
104
- NAME: RPN
105
- RESNETS:
106
- DEFORM_MODULATED: false
107
- DEFORM_NUM_GROUPS: 1
108
- DEFORM_ON_PER_STAGE:
109
- - false
110
- - false
111
- - false
112
- - false
113
- DEPTH: 50
114
- NORM: FrozenBN
115
- NUM_GROUPS: 1
116
- OUT_FEATURES:
117
- - res4
118
- RES2_OUT_CHANNELS: 256
119
- RES5_DILATION: 1
120
- STEM_OUT_CHANNELS: 64
121
- STRIDE_IN_1X1: true
122
- WIDTH_PER_GROUP: 64
123
- RETINANET:
124
- BBOX_REG_LOSS_TYPE: smooth_l1
125
- BBOX_REG_WEIGHTS:
126
- - 1.0
127
- - 1.0
128
- - 1.0
129
- - 1.0
130
- FOCAL_LOSS_ALPHA: 0.25
131
- FOCAL_LOSS_GAMMA: 2.0
132
- IN_FEATURES:
133
- - p3
134
- - p4
135
- - p5
136
- - p6
137
- - p7
138
- IOU_LABELS:
139
- - 0
140
- - -1
141
- - 1
142
- IOU_THRESHOLDS:
143
- - 0.4
144
- - 0.5
145
- NMS_THRESH_TEST: 0.5
146
- NORM: ''
147
- NUM_CLASSES: 10
148
- NUM_CONVS: 4
149
- PRIOR_PROB: 0.01
150
- SCORE_THRESH_TEST: 0.05
151
- SMOOTH_L1_LOSS_BETA: 0.1
152
- TOPK_CANDIDATES_TEST: 1000
153
- ROI_BOX_CASCADE_HEAD:
154
- BBOX_REG_WEIGHTS:
155
- - - 10.0
156
- - 10.0
157
- - 5.0
158
- - 5.0
159
- - - 20.0
160
- - 20.0
161
- - 10.0
162
- - 10.0
163
- - - 30.0
164
- - 30.0
165
- - 15.0
166
- - 15.0
167
- IOUS:
168
- - 0.5
169
- - 0.6
170
- - 0.7
171
- ROI_BOX_HEAD:
172
- BBOX_REG_LOSS_TYPE: smooth_l1
173
- BBOX_REG_LOSS_WEIGHT: 1.0
174
- BBOX_REG_WEIGHTS:
175
- - 10.0
176
- - 10.0
177
- - 5.0
178
- - 5.0
179
- CLS_AGNOSTIC_BBOX_REG: true
180
- CONV_DIM: 256
181
- FC_DIM: 1024
182
- NAME: FastRCNNConvFCHead
183
- NORM: ''
184
- NUM_CONV: 0
185
- NUM_FC: 2
186
- POOLER_RESOLUTION: 7
187
- POOLER_SAMPLING_RATIO: 0
188
- POOLER_TYPE: ROIAlignV2
189
- SMOOTH_L1_BETA: 0.0
190
- TRAIN_ON_PRED_BOXES: false
191
- ROI_HEADS:
192
- BATCH_SIZE_PER_IMAGE: 512
193
- IN_FEATURES:
194
- - p2
195
- - p3
196
- - p4
197
- - p5
198
- IOU_LABELS:
199
- - 0
200
- - 1
201
- IOU_THRESHOLDS:
202
- - 0.5
203
- NAME: CascadeROIHeads
204
- NMS_THRESH_TEST: 0.5
205
- NUM_CLASSES: 10
206
- POSITIVE_FRACTION: 0.25
207
- PROPOSAL_APPEND_GT: true
208
- SCORE_THRESH_TEST: 0.05
209
- ROI_KEYPOINT_HEAD:
210
- CONV_DIMS:
211
- - 512
212
- - 512
213
- - 512
214
- - 512
215
- - 512
216
- - 512
217
- - 512
218
- - 512
219
- LOSS_WEIGHT: 1.0
220
- MIN_KEYPOINTS_PER_IMAGE: 1
221
- NAME: KRCNNConvDeconvUpsampleHead
222
- NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
223
- NUM_KEYPOINTS: 17
224
- POOLER_RESOLUTION: 14
225
- POOLER_SAMPLING_RATIO: 0
226
- POOLER_TYPE: ROIAlignV2
227
- ROI_MASK_HEAD:
228
- CLS_AGNOSTIC_MASK: false
229
- CONV_DIM: 256
230
- NAME: MaskRCNNConvUpsampleHead
231
- NORM: ''
232
- NUM_CONV: 4
233
- POOLER_RESOLUTION: 14
234
- POOLER_SAMPLING_RATIO: 0
235
- POOLER_TYPE: ROIAlignV2
236
- RPN:
237
- BATCH_SIZE_PER_IMAGE: 256
238
- BBOX_REG_LOSS_TYPE: smooth_l1
239
- BBOX_REG_LOSS_WEIGHT: 1.0
240
- BBOX_REG_WEIGHTS:
241
- - 1.0
242
- - 1.0
243
- - 1.0
244
- - 1.0
245
- BOUNDARY_THRESH: -1
246
- CONV_DIMS:
247
- - -1
248
- HEAD_NAME: StandardRPNHead
249
- IN_FEATURES:
250
- - p2
251
- - p3
252
- - p4
253
- - p5
254
- - p6
255
- IOU_LABELS:
256
- - 0
257
- - -1
258
- - 1
259
- IOU_THRESHOLDS:
260
- - 0.3
261
- - 0.7
262
- LOSS_WEIGHT: 1.0
263
- NMS_THRESH: 0.7
264
- POSITIVE_FRACTION: 0.5
265
- POST_NMS_TOPK_TEST: 1000
266
- POST_NMS_TOPK_TRAIN: 2000
267
- PRE_NMS_TOPK_TEST: 1000
268
- PRE_NMS_TOPK_TRAIN: 2000
269
- SMOOTH_L1_BETA: 0.0
270
- SEM_SEG_HEAD:
271
- COMMON_STRIDE: 4
272
- CONVS_DIM: 128
273
- IGNORE_VALUE: 255
274
- IN_FEATURES:
275
- - p2
276
- - p3
277
- - p4
278
- - p5
279
- LOSS_WEIGHT: 1.0
280
- NAME: SemSegFPNHead
281
- NORM: GN
282
- NUM_CLASSES: 10
283
- VIT:
284
- DROP_PATH: 0.1
285
- IMG_SIZE:
286
- - 224
287
- - 224
288
- NAME: layoutlmv3_base
289
- OUT_FEATURES:
290
- - layer3
291
- - layer5
292
- - layer7
293
- - layer11
294
- POS_TYPE: abs
295
- WEIGHTS:
296
- OUTPUT_DIR:
297
- SCIHUB_DATA_DIR_TRAIN: ~/publaynet/layout_scihub/train
298
- SEED: 42
299
- SOLVER:
300
- AMP:
301
- ENABLED: true
302
- BACKBONE_MULTIPLIER: 1.0
303
- BASE_LR: 0.0002
304
- BIAS_LR_FACTOR: 1.0
305
- CHECKPOINT_PERIOD: 2000
306
- CLIP_GRADIENTS:
307
- CLIP_TYPE: full_model
308
- CLIP_VALUE: 1.0
309
- ENABLED: true
310
- NORM_TYPE: 2.0
311
- GAMMA: 0.1
312
- GRADIENT_ACCUMULATION_STEPS: 1
313
- IMS_PER_BATCH: 32
314
- LR_SCHEDULER_NAME: WarmupCosineLR
315
- MAX_ITER: 20000
316
- MOMENTUM: 0.9
317
- NESTEROV: false
318
- OPTIMIZER: ADAMW
319
- REFERENCE_WORLD_SIZE: 0
320
- STEPS:
321
- - 10000
322
- WARMUP_FACTOR: 0.01
323
- WARMUP_ITERS: 333
324
- WARMUP_METHOD: linear
325
- WEIGHT_DECAY: 0.05
326
- WEIGHT_DECAY_BIAS: null
327
- WEIGHT_DECAY_NORM: 0.0
328
- TEST:
329
- AUG:
330
- ENABLED: false
331
- FLIP: true
332
- MAX_SIZE: 4000
333
- MIN_SIZES:
334
- - 400
335
- - 500
336
- - 600
337
- - 700
338
- - 800
339
- - 900
340
- - 1000
341
- - 1100
342
- - 1200
343
- DETECTIONS_PER_IMAGE: 100
344
- EVAL_PERIOD: 1000
345
- EXPECTED_RESULTS: []
346
- KEYPOINT_OKS_SIGMAS: []
347
- PRECISE_BN:
348
- ENABLED: false
349
- NUM_ITER: 200
350
- VERSION: 2
351
- VIS_PERIOD: 0
@@ -1,147 +0,0 @@
1
- magic_pdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- magic_pdf/pdf_parse_union_core_v2.py,sha256=Pt3UtPQgOrF2YudQqrwVVC767_271E-LRg2aUsiggXg,38435
3
- magic_pdf/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- magic_pdf/config/constants.py,sha256=fXGzANULnJWLPxwYp3BEFWx-rnorzpySMx63ffyEyq4,1272
5
- magic_pdf/config/drop_reason.py,sha256=CqjMzBE96Qo8OeFvhhhItY8WhyqsKhE3DmyJLoQZNCc,2248
6
- magic_pdf/config/drop_tag.py,sha256=CjveyzhAsHm_bfXB7ZZNKruw1NR-WdKD8Hz6OhQdG0A,680
7
- magic_pdf/config/enums.py,sha256=CImYuw4sbKpq9zrj6zrrEvtdoGkjxDt8S8ByNVDpypU,89
8
- magic_pdf/config/exceptions.py,sha256=2tsJxYUebVeimyYBGQkc9Nd1kIakTmWmz3SDcfJWy54,784
9
- magic_pdf/config/make_content_config.py,sha256=J2eJIhVHBPGwX18zVQomQUOxs8LcfeGLxLgdBEeRvLg,248
10
- magic_pdf/config/model_block_type.py,sha256=y5ie2ZLvo-h8OdVk8HOEha6qK0OJFtLmtOhYjrV680g,166
11
- magic_pdf/config/ocr_content_type.py,sha256=e_7RBTdShaWvWhMO2SFou7GM521elMH_Jtn5usbHWdY,890
12
- magic_pdf/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- magic_pdf/data/dataset.py,sha256=q7wfX99HTVjKCFVpf1mnYn55rK6oF5Dz8O9w4C9cYhw,11196
14
- magic_pdf/data/read_api.py,sha256=_faBnYE3iU_EiQLNFjVM6a8IQtOGAcSQNYBZsTSN1d8,5225
15
- magic_pdf/data/schemas.py,sha256=oIUTBzK8Wq8Wuy8A_uilWAbVhucRvOs9_f3lSKYgcmQ,664
16
- magic_pdf/data/utils.py,sha256=aMeQB3soGUJyoI41hfgWeOZNzPj36SOrewUM7z51AOU,2305
17
- magic_pdf/data/data_reader_writer/__init__.py,sha256=QtevUaeSivv9dQKi3Tomfn4Z0E4To0cB8qXTnglxaHc,705
18
- magic_pdf/data/data_reader_writer/base.py,sha256=nqmAcdHOXMOJO6RAT3ILligDFaw8Op0STyCw5yOzAbI,1706
19
- magic_pdf/data/data_reader_writer/filebase.py,sha256=VbNAxLyo0Io0j7iprJERt_TqxzHAtA7cUyPIaJstToU,2146
20
- magic_pdf/data/data_reader_writer/multi_bucket_s3.py,sha256=4pEJ8PPd3nX7sccHobCs0mbDM8BiqDP_sAEz7CIvpNI,5938
21
- magic_pdf/data/data_reader_writer/s3.py,sha256=9Oy1cNuXMwG1e8PgZ7AR-pn_MqHAhkgAGnyEZCYoYAA,2408
22
- magic_pdf/data/io/__init__.py,sha256=WKaIlu8i5AWYxFCGNJcorAfMnlUQDOF8CX07Ycfnu2c,294
23
- magic_pdf/data/io/base.py,sha256=SqNQqe30ZvoVvg7GVv-hLMCjN6yBgDyQQWeLgGsTfhQ,1118
24
- magic_pdf/data/io/http.py,sha256=XlKB0DNf4a_uUnfgcclvaaOtmE7lmddx0DnK8A-emAM,958
25
- magic_pdf/data/io/s3.py,sha256=hyA7sbNriQy64xd_uyJ7acN_oneQ1Pdmoc7_xcvkue8,3606
26
- magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- magic_pdf/dict2md/ocr_mkcontent.py,sha256=12WeBVxnBzzruk8CfYqqsV2dpH-mDWmE4Osl1RlRoc8,13741
28
- magic_pdf/filter/__init__.py,sha256=_7lSez_myu4b6cdzPpQ-NfREuqeBSq_QdyBPKVLyq2U,1505
29
- magic_pdf/filter/pdf_classify_by_type.py,sha256=YNYXamxYgEiSujwilCNHOtrwpgJGDiQ597qJfardDVc,42354
30
- magic_pdf/filter/pdf_meta_scan.py,sha256=eOuM0-JgaXvHolSgepGoNDJDmv_uITWLQpH_0MfnVQw,17478
31
- magic_pdf/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
- magic_pdf/integrations/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- magic_pdf/integrations/rag/api.py,sha256=t38wvIBzLje4_JzTP3dewMLqV-tQJ-A3B92Sj2oyrfs,2507
34
- magic_pdf/integrations/rag/type.py,sha256=Z_1g_ZIOCsb7-FmZBudReIXj8nzGrgj_BygCalhJdmk,3193
35
- magic_pdf/integrations/rag/utils.py,sha256=DCb-UhC8TElb6Eq7_6NmmETreKEk5DVE18hNL8sTEBk,11762
36
- magic_pdf/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
- magic_pdf/libs/boxbase.py,sha256=DKZXhwpJd-HE2_Du7NmkeeYW5gG-iwX3GeSWL7rYGv0,16956
38
- magic_pdf/libs/clean_memory.py,sha256=OsQexCjmBO2i-Hv-0uYQfn72dbUWR8sTW81nG2zlNQQ,479
39
- magic_pdf/libs/commons.py,sha256=xD0fGA16KNB5rhbl4zRrOqdrNHYwaRablT_s9W2ZTbw,1174
40
- magic_pdf/libs/config_reader.py,sha256=9GXK7jtDyA_jxXKWZAQ69rB02v5UW4mOmo1IaOYOkW0,4374
41
- magic_pdf/libs/convert_utils.py,sha256=Ov-lsfCLBPz_15iSJXIslBNmrSf_E_1g_XDWJy8NgO8,143
42
- magic_pdf/libs/coordinate_transform.py,sha256=Bbop2cP2uz2ZG0U0gwd7J6EKkgABq5Rv03qf2LMPw80,429
43
- magic_pdf/libs/draw_bbox.py,sha256=hpUmpPiQVu7UgWQa3M49dS22G6A9gcG2jpq4dQjTjzA,18331
44
- magic_pdf/libs/hash_utils.py,sha256=VEKK9WfFoZgrPfi8kfITjLpr8Ahufs8tXh9R1Y5lAL8,404
45
- magic_pdf/libs/json_compressor.py,sha256=6-KCu0lb5ksmyqWtQGb4QqmP-FjRb5dP7P-Hevcn68g,875
46
- magic_pdf/libs/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
47
- magic_pdf/libs/local_math.py,sha256=tqljQOgqh3fZc146HYhO88JXJaiXMVwArBkk_CSGICc,177
48
- magic_pdf/libs/markdown_utils.py,sha256=86v2BmsSV4NkoRZrH4uQD1youJhYFF3vIKr_vDeg3z0,270
49
- magic_pdf/libs/path_utils.py,sha256=Hykw_l5CU736b2egHV9P7B-qh3QNKO4nZSGCbsi0Z8E,1043
50
- magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3396
51
- magic_pdf/libs/pdf_image_tools.py,sha256=kjzSEbm7K0yiHv8kJ4VbZ9HHktM8qvAv3LhxRyDZEQk,1987
52
- magic_pdf/libs/performance_stats.py,sha256=BFi4NIsUYlanznYoTVq4hBpj4NOuShAlWBHzebBGVYM,1702
53
- magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
54
- magic_pdf/libs/version.py,sha256=Mlm4Gvmb_6yQxwUbv2Ksc-BJFXLPg9H1Vt2iV7wXrA4,22
55
- magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
56
- magic_pdf/model/batch_analyze.py,sha256=sbrgOJWycb1Ep6e62CPi6jEyG6VSeklIxc4PmrqaLhM,11933
57
- magic_pdf/model/doc_analyze_by_custom_model.py,sha256=T0-h4QmSIDXRzgF5uWO4jQrwIot221l26PXU52xeKiA,7933
58
- magic_pdf/model/magic_model.py,sha256=yZKWo_wRck_-YLyFGRiUHGar8sV1Y6458BFLbyBAt74,30682
59
- magic_pdf/model/model_list.py,sha256=aqfEJlEfbib3D3ISrxc0Coh6SbffYh8Yq2FlQN35_zA,213
60
- magic_pdf/model/pdf_extract_kit.py,sha256=Rd51VNZPKRA_tUbDss-b44d84K6WDG2S87a37Ax7HUA,12224
61
- magic_pdf/model/pp_structure_v2.py,sha256=NcqFWL4nUtjl82MFak8HX_8V3i4Aw_fK4dATrIp5uGs,3840
62
- magic_pdf/model/sub_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
- magic_pdf/model/sub_modules/model_init.py,sha256=Ltwi3Nd5PdVVXRF9fto5nImFVg6w-twAMzOLV_F-c3g,7693
64
- magic_pdf/model/sub_modules/model_utils.py,sha256=2pI1Xcr2zCF3b64e4WoFtIbjSmTVYBE4zjyHB23gvmE,2488
65
- magic_pdf/model/sub_modules/language_detection/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
66
- magic_pdf/model/sub_modules/language_detection/utils.py,sha256=5nec_loLyYCJ5o6n38AYLz2SKmRvHDCBdt6ka84EaGM,3096
67
- magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py,sha256=bl2i7kweoJNdj47FlE9h0B_-nNQrMcW9mCLQ1puMEH8,4893
68
- magic_pdf/model/sub_modules/language_detection/yolov11/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
69
- magic_pdf/model/sub_modules/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
- magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py,sha256=gy7rc8poO-Zr8511NJjuBV8Uryq5k3JKrstLtCONg0c,2237
71
- magic_pdf/model/sub_modules/layout/doclayout_yolo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
- magic_pdf/model/sub_modules/layout/layoutlmv3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
- magic_pdf/model/sub_modules/layout/layoutlmv3/backbone.py,sha256=1cvSCczgvwOLdvzWyqttoYPMHsXmnzI3w9abJ1bAXoM,7106
74
- magic_pdf/model/sub_modules/layout/layoutlmv3/beit.py,sha256=e-INve6bpEx_0FM5wYbQcEcelc79tzDlCljTVHaGt1w,30450
75
- magic_pdf/model/sub_modules/layout/layoutlmv3/deit.py,sha256=Qyn5UWutZ-0GJczexCh-oMMSXtav_g3ovumMFJp8Om4,17000
76
- magic_pdf/model/sub_modules/layout/layoutlmv3/model_init.py,sha256=PhWqqRwgSSmXTaUlLIjGqnBUNjzxwYDKgMzKjnxNy1k,4528
77
- magic_pdf/model/sub_modules/layout/layoutlmv3/rcnn_vl.py,sha256=nI4G6AeLRmjavNhs5S2USKh0ozn-ftMuW0F0m_eVy3c,6649
78
- magic_pdf/model/sub_modules/layout/layoutlmv3/visualizer.py,sha256=H6UYeCCbaN2gbDjGthTkKkPoyWxfE3azRjsR7fVBwnw,49797
79
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/__init__.py,sha256=C4N9gXJr7is7uznvQefQ7dOhlzEhdp86Lgh-7p0Y-08,186
80
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/__init__.py,sha256=W7V62JOh12NdMZj2H1sde3Il0AqW2VKplmHEsLle6tg,76
81
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/cord.py,sha256=jR_lRZxy8SeEvTK3FdlXmQHF0kefJf7ZqwM_8pvyI5E,8153
82
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/data_collator.py,sha256=M2TE47BprHSuQJYcoMeWOSpqkr_nh8VK6t2l26XWmxg,6279
83
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/funsd.py,sha256=Ez9tMeruHncJlkKQ7iRGBB9Pk1uWtgxlGeqs-sOmIG0,5214
84
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/image_utils.py,sha256=vuNOMzYw_h7jmaD2XUqkGlrjDEPB7XUts16GRICBmG4,10334
85
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/xfund.py,sha256=6jLKyc_4VhbHY4YEzBXm5RkPdsd9ldnUGXFZBLiJ-_s,8270
86
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/__init__.py,sha256=d5bm3Rx-jTrgfJDWrzD7t5R5CdHfug9dCNvUEneIYW4,190
87
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py,sha256=a04w_C0B4P9jF-3I_tXCj3fLmfFQR5XSKGbhgGm--pM,1216
88
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py,sha256=CJBcAmmLeRFVMN1YjWefoUW7hk0KXek0Eb_tergKl4Y,2150
89
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py,sha256=mdo8tO-DrJcv0Lbk9Pp98n3NQXYOnFFyXQWjU7t35kA,54633
90
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py,sha256=diKlrfxYjKAmYrUgjYdx-FXLh-swShC3tl-EBX1b3oI,1197
91
- magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py,sha256=0lxiG69_fGpSSBYA9CBLnDa_qqa1rInZ0pJpqBwZ0Yw,1372
92
- magic_pdf/model/sub_modules/mfd/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
93
- magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py,sha256=QfHbMr1br0pOJUu1NJEMgA6yw11G0yFImJv_AfW48_c,1008
94
- magic_pdf/model/sub_modules/mfd/yolov8/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
- magic_pdf/model/sub_modules/mfr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
- magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py,sha256=HPNetRfQeHoHfRTzFEaIjLSHfjrxRvS-EaApMUebZuQ,8020
97
- magic_pdf/model/sub_modules/mfr/unimernet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
98
- magic_pdf/model/sub_modules/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
99
- magic_pdf/model/sub_modules/ocr/paddleocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
- magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py,sha256=biuLnQWUquZkxmObjpg33iVCPPJKbRA4kx0Uo6OvGyc,12672
101
- magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py,sha256=QBBeFN1iF7nj5gqQ5sQXjhpwy8lB4c96gubnRDBuDNU,8424
102
- magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_291_mod.py,sha256=VouMTvi6M5TV6pQdlpusgfyZapxiZ_Wi7Ff53eMC3rE,8996
103
- magic_pdf/model/sub_modules/reading_oreder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
- magic_pdf/model/sub_modules/reading_oreder/layoutreader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
- magic_pdf/model/sub_modules/reading_oreder/layoutreader/helpers.py,sha256=IVUFcNMDF3-kio-BIxjppHnWS3eHPqvvNihIw2fbIFM,4372
106
- magic_pdf/model/sub_modules/reading_oreder/layoutreader/xycut.py,sha256=ezNSq_Y4UXiztB58hbXJsjTJlOBqWIjuW5A2uLSaZSo,7349
107
- magic_pdf/model/sub_modules/table/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
- magic_pdf/model/sub_modules/table/table_utils.py,sha256=B9BC4f5EEjlt2ldYxrIC8Wic2Tz3t3gTJeEyK3ggrOU,282
109
- magic_pdf/model/sub_modules/table/rapidtable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
- magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py,sha256=6TUO6wiA4oZQB2_VP6kngZF6-2cI6mAP57Qf2lv6LVw,2922
111
- magic_pdf/model/sub_modules/table/structeqtable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
112
- magic_pdf/model/sub_modules/table/structeqtable/struct_eqtable.py,sha256=SrNPm-uOFEvN5muFGbXTAuwzXm-rCiaihVdqbydIBIA,1131
113
- magic_pdf/model/sub_modules/table/tablemaster/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
- magic_pdf/model/sub_modules/table/tablemaster/tablemaster_paddle.py,sha256=QEQ-56AzoIAU7UWsEidWW_KDOY5r16qm2kSpox8cxq4,2755
115
- magic_pdf/operators/__init__.py,sha256=liU2-WYUvsQ1G4PYBppyvokS9z5IjrnlVMtoBAC1REI,2630
116
- magic_pdf/operators/models.py,sha256=mRqbCVrxxaUVDpEBAsXaK7EL1M-goICkE1W0FYgewio,5305
117
- magic_pdf/operators/pipes.py,sha256=XgBgisKQd_ruW-3Tw4v5LhqloZUHgn2aFcpi_q8LbCs,6767
118
- magic_pdf/post_proc/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
119
- magic_pdf/post_proc/llm_aided.py,sha256=yzhu2cCpUZjdwf3v0swYDgSs9VWIfMAoXepYIP1EMZs,6367
120
- magic_pdf/post_proc/para_split_v3.py,sha256=v4SdQn4OZdHRXpWQMfQ-FGJz_tglQ88uFUqpwY542Fo,16922
121
- magic_pdf/pre_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
122
- magic_pdf/pre_proc/construct_page_dict.py,sha256=OFmq5XRKi6fYIo-lmGlL-NB16Sf0egzsfEx-fT2uYrc,660
123
- magic_pdf/pre_proc/cut_image.py,sha256=NDzbxwD7z7Tb4uAxL4KR6LzURFdN1Tzr4nPvj-VmEqc,1225
124
- magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=nt88ttXCEI_1ihAF7HU15SQjwM69V-iJmk-L_nyzA6o,9328
125
- magic_pdf/pre_proc/ocr_dict_merge.py,sha256=4Z3aHZ9sxzijkVpOCENslvUcpp7DXgNID4Gl3pxwIg4,5512
126
- magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=xrgC9vR0poklZuY4Og41pZVdXzuaGFg3BnQ01X60dpo,3102
127
- magic_pdf/pre_proc/remove_bbox_overlap.py,sha256=mcdxAh4P56NZ3Ij8h3vW8qC_SrszfXflVWuWUuUiTNg,3089
128
- magic_pdf/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
129
- magic_pdf/resources/model_config/model_configs.yaml,sha256=v3HwFTmIbXJJEBXUHHHMnZQKRo6ZQtP3cncSebh-5gc,322
130
- magic_pdf/resources/model_config/UniMERNet/demo.yaml,sha256=Jdaim2D2lAYrV9rhc1X5Sy2_IacGOrfysJhxEUgSElo,827
131
- magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml,sha256=9aNAEYgpHTAWpcUrDvuPG2y4V-Qw8QdcJefi96y8yDU,6109
132
- magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt,sha256=dV4lcudF8wimEbAooYbvISvFhrXjp9i0rMRqv9VW6hY,3204667
133
- magic_pdf/spark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
- magic_pdf/spark/spark_api.py,sha256=BYO6zlRW0cEnIUB3ZzNQTu_LsPHEVitqiUN7gy3x_wo,1124
135
- magic_pdf/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
136
- magic_pdf/tools/cli.py,sha256=YiX9LU4UeU3yYIpblGO1cbO95Tbo3A8cmWFK_1WvqfU,4134
137
- magic_pdf/tools/cli_dev.py,sha256=3RbubfTIagWoFYdu8wSDanr-BJDjFGeDet55jTy7He0,3948
138
- magic_pdf/tools/common.py,sha256=1LfMeXBBsb3WlGeNAze_pPOYXQ8Qbfh-JgRXweojHKo,8381
139
- magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
140
- magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
141
- magic_pdf/utils/office_to_pdf.py,sha256=7aj-Ls2v8saD-Rgu_t3FIc-J3Ka9wnmiEH5zY-H1Vxs,729
142
- magic_pdf-1.2.1.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
143
- magic_pdf-1.2.1.dist-info/METADATA,sha256=oMxODNFW4g154Rrh9g2sE9_irmB6x7j_5KmF7dRYEPQ,40994
144
- magic_pdf-1.2.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
145
- magic_pdf-1.2.1.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
146
- magic_pdf-1.2.1.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
147
- magic_pdf-1.2.1.dist-info/RECORD,,