magic-pdf 1.2.2__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/data/batch_build_dataset.py +156 -0
- magic_pdf/data/dataset.py +56 -25
- magic_pdf/data/utils.py +108 -9
- magic_pdf/dict2md/ocr_mkcontent.py +4 -3
- magic_pdf/libs/pdf_image_tools.py +11 -6
- magic_pdf/libs/performance_stats.py +12 -1
- magic_pdf/libs/version.py +1 -1
- magic_pdf/model/batch_analyze.py +175 -201
- magic_pdf/model/doc_analyze_by_custom_model.py +142 -92
- magic_pdf/model/pdf_extract_kit.py +5 -38
- magic_pdf/model/sub_modules/language_detection/utils.py +2 -4
- magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py +24 -19
- magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py +3 -1
- magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py +3 -1
- magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py +31 -102
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py +13 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py +189 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py +8 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py +163 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py +2351 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py +9 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py +132 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py +132 -0
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py +1084 -0
- magic_pdf/model/sub_modules/model_init.py +50 -37
- magic_pdf/model/sub_modules/model_utils.py +18 -12
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py +1 -0
- magic_pdf/model/sub_modules/ocr/{paddleocr → paddleocr2pytorch}/ocr_utils.py +102 -97
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py +193 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py +39 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py +8 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py +48 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py +418 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py +25 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py +105 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py +62 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py +269 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py +290 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py +516 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py +136 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py +234 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py +638 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py +76 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py +43 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py +23 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py +109 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py +54 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py +58 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py +29 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py +456 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py +117 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py +228 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py +33 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py +20 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py +179 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py +690 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py +0 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml +383 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt +162 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt +8421 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt +163 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt +167 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt +95 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt +4399 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt +153 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt +3688 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt +185 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt +6623 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt +128 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt +151 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml +49 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py +1 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/__init__.py +1 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_cls.py +106 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_det.py +217 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_rec.py +440 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_system.py +104 -0
- magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py +227 -0
- magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py +15 -19
- magic_pdf/pdf_parse_union_core_v2.py +112 -74
- magic_pdf/pre_proc/ocr_dict_merge.py +9 -1
- magic_pdf/pre_proc/ocr_span_list_modify.py +51 -0
- magic_pdf/resources/model_config/model_configs.yaml +1 -1
- magic_pdf/resources/slanet_plus/slanet-plus.onnx +0 -0
- magic_pdf/tools/cli.py +30 -12
- magic_pdf/tools/common.py +90 -12
- {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.1.dist-info}/METADATA +92 -59
- magic_pdf-1.3.1.dist-info/RECORD +203 -0
- {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.1.dist-info}/WHEEL +1 -1
- magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py +0 -204
- magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_291_mod.py +0 -213
- magic_pdf/model/sub_modules/table/structeqtable/struct_eqtable.py +0 -37
- magic_pdf/model/sub_modules/table/tablemaster/tablemaster_paddle.py +0 -71
- magic_pdf/resources/model_config/UniMERNet/demo.yaml +0 -46
- magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml +0 -351
- magic_pdf-1.2.2.dist-info/RECORD +0 -147
- /magic_pdf/model/sub_modules/{ocr/paddleocr/__init__.py → mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py} +0 -0
- /magic_pdf/model/sub_modules/{table/structeqtable → ocr/paddleocr2pytorch/pytorchocr}/__init__.py +0 -0
- /magic_pdf/model/sub_modules/{table/tablemaster → ocr/paddleocr2pytorch/pytorchocr/modeling}/__init__.py +0 -0
- {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.1.dist-info}/LICENSE.md +0 -0
- {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.1.dist-info}/entry_points.txt +0 -0
- {magic_pdf-1.2.2.dist-info → magic_pdf-1.3.1.dist-info}/top_level.txt +0 -0
@@ -1,37 +0,0 @@
|
|
1
|
-
import torch
|
2
|
-
from struct_eqtable import build_model
|
3
|
-
|
4
|
-
from magic_pdf.model.sub_modules.table.table_utils import minify_html
|
5
|
-
|
6
|
-
|
7
|
-
class StructTableModel:
|
8
|
-
def __init__(self, model_path, max_new_tokens=1024, max_time=60):
|
9
|
-
# init
|
10
|
-
assert torch.cuda.is_available(), "CUDA must be available for StructEqTable model."
|
11
|
-
self.model = build_model(
|
12
|
-
model_ckpt=model_path,
|
13
|
-
max_new_tokens=max_new_tokens,
|
14
|
-
max_time=max_time,
|
15
|
-
lmdeploy=False,
|
16
|
-
flash_attn=False,
|
17
|
-
batch_size=1,
|
18
|
-
).cuda()
|
19
|
-
self.default_format = "html"
|
20
|
-
|
21
|
-
def predict(self, images, output_format=None, **kwargs):
|
22
|
-
|
23
|
-
if output_format is None:
|
24
|
-
output_format = self.default_format
|
25
|
-
else:
|
26
|
-
if output_format not in ['latex', 'markdown', 'html']:
|
27
|
-
raise ValueError(f"Output format {output_format} is not supported.")
|
28
|
-
|
29
|
-
results = self.model(
|
30
|
-
images, output_format=output_format
|
31
|
-
)
|
32
|
-
|
33
|
-
if output_format == "html":
|
34
|
-
results = [minify_html(html) for html in results]
|
35
|
-
|
36
|
-
return results
|
37
|
-
|
@@ -1,71 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
|
3
|
-
import cv2
|
4
|
-
import numpy as np
|
5
|
-
from ppstructure.table.predict_table import TableSystem
|
6
|
-
from ppstructure.utility import init_args
|
7
|
-
from PIL import Image
|
8
|
-
|
9
|
-
from magic_pdf.config.constants import * # noqa: F403
|
10
|
-
|
11
|
-
|
12
|
-
class TableMasterPaddleModel(object):
|
13
|
-
"""This class is responsible for converting image of table into HTML format
|
14
|
-
using a pre-trained model.
|
15
|
-
|
16
|
-
Attributes:
|
17
|
-
- table_sys: An instance of TableSystem initialized with parsed arguments.
|
18
|
-
|
19
|
-
Methods:
|
20
|
-
- __init__(config): Initializes the model with configuration parameters.
|
21
|
-
- img2html(image): Converts a PIL Image or NumPy array to HTML string.
|
22
|
-
- parse_args(**kwargs): Parses configuration arguments.
|
23
|
-
"""
|
24
|
-
|
25
|
-
def __init__(self, config):
|
26
|
-
"""
|
27
|
-
Parameters:
|
28
|
-
- config (dict): Configuration dictionary containing model_dir and device.
|
29
|
-
"""
|
30
|
-
args = self.parse_args(**config)
|
31
|
-
self.table_sys = TableSystem(args)
|
32
|
-
|
33
|
-
def img2html(self, image):
|
34
|
-
"""
|
35
|
-
Parameters:
|
36
|
-
- image (PIL.Image or np.ndarray): The image of the table to be converted.
|
37
|
-
|
38
|
-
Return:
|
39
|
-
- HTML (str): A string representing the HTML structure with content of the table.
|
40
|
-
"""
|
41
|
-
if isinstance(image, Image.Image):
|
42
|
-
image = np.asarray(image)
|
43
|
-
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
44
|
-
pred_res, _ = self.table_sys(image)
|
45
|
-
pred_html = pred_res['html']
|
46
|
-
# res = '<td><table border="1">' + pred_html.replace("<html><body><table>", "").replace(
|
47
|
-
# "</table></body></html>","") + "</table></td>\n"
|
48
|
-
return pred_html
|
49
|
-
|
50
|
-
def parse_args(self, **kwargs):
|
51
|
-
parser = init_args()
|
52
|
-
model_dir = kwargs.get('model_dir')
|
53
|
-
table_model_dir = os.path.join(model_dir, TABLE_MASTER_DIR) # noqa: F405
|
54
|
-
table_char_dict_path = os.path.join(model_dir, TABLE_MASTER_DICT) # noqa: F405
|
55
|
-
det_model_dir = os.path.join(model_dir, DETECT_MODEL_DIR) # noqa: F405
|
56
|
-
rec_model_dir = os.path.join(model_dir, REC_MODEL_DIR) # noqa: F405
|
57
|
-
rec_char_dict_path = os.path.join(model_dir, REC_CHAR_DICT) # noqa: F405
|
58
|
-
device = kwargs.get('device', 'cpu')
|
59
|
-
use_gpu = True if device.startswith('cuda') else False
|
60
|
-
config = {
|
61
|
-
'use_gpu': use_gpu,
|
62
|
-
'table_max_len': kwargs.get('table_max_len', TABLE_MAX_LEN), # noqa: F405
|
63
|
-
'table_algorithm': 'TableMaster',
|
64
|
-
'table_model_dir': table_model_dir,
|
65
|
-
'table_char_dict_path': table_char_dict_path,
|
66
|
-
'det_model_dir': det_model_dir,
|
67
|
-
'rec_model_dir': rec_model_dir,
|
68
|
-
'rec_char_dict_path': rec_char_dict_path,
|
69
|
-
}
|
70
|
-
parser.set_defaults(**config)
|
71
|
-
return parser.parse_args([])
|
@@ -1,46 +0,0 @@
|
|
1
|
-
model:
|
2
|
-
arch: unimernet
|
3
|
-
model_type: unimernet
|
4
|
-
model_config:
|
5
|
-
model_name: ./models/unimernet_base
|
6
|
-
max_seq_len: 1536
|
7
|
-
|
8
|
-
load_pretrained: True
|
9
|
-
pretrained: './models/unimernet_base/pytorch_model.pth'
|
10
|
-
tokenizer_config:
|
11
|
-
path: ./models/unimernet_base
|
12
|
-
|
13
|
-
datasets:
|
14
|
-
formula_rec_eval:
|
15
|
-
vis_processor:
|
16
|
-
eval:
|
17
|
-
name: "formula_image_eval"
|
18
|
-
image_size:
|
19
|
-
- 192
|
20
|
-
- 672
|
21
|
-
|
22
|
-
run:
|
23
|
-
runner: runner_iter
|
24
|
-
task: unimernet_train
|
25
|
-
|
26
|
-
batch_size_train: 64
|
27
|
-
batch_size_eval: 64
|
28
|
-
num_workers: 1
|
29
|
-
|
30
|
-
iters_per_inner_epoch: 2000
|
31
|
-
max_iters: 60000
|
32
|
-
|
33
|
-
seed: 42
|
34
|
-
output_dir: "../output/demo"
|
35
|
-
|
36
|
-
evaluate: True
|
37
|
-
test_splits: [ "eval" ]
|
38
|
-
|
39
|
-
device: "cuda"
|
40
|
-
world_size: 1
|
41
|
-
dist_url: "env://"
|
42
|
-
distributed: True
|
43
|
-
distributed_type: ddp # or fsdp when train llm
|
44
|
-
|
45
|
-
generate_cfg:
|
46
|
-
temperature: 0.0
|
@@ -1,351 +0,0 @@
|
|
1
|
-
AUG:
|
2
|
-
DETR: true
|
3
|
-
CACHE_DIR: ~/cache/huggingface
|
4
|
-
CUDNN_BENCHMARK: false
|
5
|
-
DATALOADER:
|
6
|
-
ASPECT_RATIO_GROUPING: true
|
7
|
-
FILTER_EMPTY_ANNOTATIONS: false
|
8
|
-
NUM_WORKERS: 4
|
9
|
-
REPEAT_THRESHOLD: 0.0
|
10
|
-
SAMPLER_TRAIN: TrainingSampler
|
11
|
-
DATASETS:
|
12
|
-
PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
|
13
|
-
PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
|
14
|
-
PROPOSAL_FILES_TEST: []
|
15
|
-
PROPOSAL_FILES_TRAIN: []
|
16
|
-
TEST:
|
17
|
-
- scihub_train
|
18
|
-
TRAIN:
|
19
|
-
- scihub_train
|
20
|
-
GLOBAL:
|
21
|
-
HACK: 1.0
|
22
|
-
ICDAR_DATA_DIR_TEST: ''
|
23
|
-
ICDAR_DATA_DIR_TRAIN: ''
|
24
|
-
INPUT:
|
25
|
-
CROP:
|
26
|
-
ENABLED: true
|
27
|
-
SIZE:
|
28
|
-
- 384
|
29
|
-
- 600
|
30
|
-
TYPE: absolute_range
|
31
|
-
FORMAT: RGB
|
32
|
-
MASK_FORMAT: polygon
|
33
|
-
MAX_SIZE_TEST: 1333
|
34
|
-
MAX_SIZE_TRAIN: 1333
|
35
|
-
MIN_SIZE_TEST: 800
|
36
|
-
MIN_SIZE_TRAIN:
|
37
|
-
- 480
|
38
|
-
- 512
|
39
|
-
- 544
|
40
|
-
- 576
|
41
|
-
- 608
|
42
|
-
- 640
|
43
|
-
- 672
|
44
|
-
- 704
|
45
|
-
- 736
|
46
|
-
- 768
|
47
|
-
- 800
|
48
|
-
MIN_SIZE_TRAIN_SAMPLING: choice
|
49
|
-
RANDOM_FLIP: horizontal
|
50
|
-
MODEL:
|
51
|
-
ANCHOR_GENERATOR:
|
52
|
-
ANGLES:
|
53
|
-
- - -90
|
54
|
-
- 0
|
55
|
-
- 90
|
56
|
-
ASPECT_RATIOS:
|
57
|
-
- - 0.5
|
58
|
-
- 1.0
|
59
|
-
- 2.0
|
60
|
-
NAME: DefaultAnchorGenerator
|
61
|
-
OFFSET: 0.0
|
62
|
-
SIZES:
|
63
|
-
- - 32
|
64
|
-
- - 64
|
65
|
-
- - 128
|
66
|
-
- - 256
|
67
|
-
- - 512
|
68
|
-
BACKBONE:
|
69
|
-
FREEZE_AT: 2
|
70
|
-
NAME: build_vit_fpn_backbone
|
71
|
-
CONFIG_PATH: ''
|
72
|
-
DEVICE: cuda
|
73
|
-
FPN:
|
74
|
-
FUSE_TYPE: sum
|
75
|
-
IN_FEATURES:
|
76
|
-
- layer3
|
77
|
-
- layer5
|
78
|
-
- layer7
|
79
|
-
- layer11
|
80
|
-
NORM: ''
|
81
|
-
OUT_CHANNELS: 256
|
82
|
-
IMAGE_ONLY: true
|
83
|
-
KEYPOINT_ON: false
|
84
|
-
LOAD_PROPOSALS: false
|
85
|
-
MASK_ON: true
|
86
|
-
META_ARCHITECTURE: VLGeneralizedRCNN
|
87
|
-
PANOPTIC_FPN:
|
88
|
-
COMBINE:
|
89
|
-
ENABLED: true
|
90
|
-
INSTANCES_CONFIDENCE_THRESH: 0.5
|
91
|
-
OVERLAP_THRESH: 0.5
|
92
|
-
STUFF_AREA_LIMIT: 4096
|
93
|
-
INSTANCE_LOSS_WEIGHT: 1.0
|
94
|
-
PIXEL_MEAN:
|
95
|
-
- 127.5
|
96
|
-
- 127.5
|
97
|
-
- 127.5
|
98
|
-
PIXEL_STD:
|
99
|
-
- 127.5
|
100
|
-
- 127.5
|
101
|
-
- 127.5
|
102
|
-
PROPOSAL_GENERATOR:
|
103
|
-
MIN_SIZE: 0
|
104
|
-
NAME: RPN
|
105
|
-
RESNETS:
|
106
|
-
DEFORM_MODULATED: false
|
107
|
-
DEFORM_NUM_GROUPS: 1
|
108
|
-
DEFORM_ON_PER_STAGE:
|
109
|
-
- false
|
110
|
-
- false
|
111
|
-
- false
|
112
|
-
- false
|
113
|
-
DEPTH: 50
|
114
|
-
NORM: FrozenBN
|
115
|
-
NUM_GROUPS: 1
|
116
|
-
OUT_FEATURES:
|
117
|
-
- res4
|
118
|
-
RES2_OUT_CHANNELS: 256
|
119
|
-
RES5_DILATION: 1
|
120
|
-
STEM_OUT_CHANNELS: 64
|
121
|
-
STRIDE_IN_1X1: true
|
122
|
-
WIDTH_PER_GROUP: 64
|
123
|
-
RETINANET:
|
124
|
-
BBOX_REG_LOSS_TYPE: smooth_l1
|
125
|
-
BBOX_REG_WEIGHTS:
|
126
|
-
- 1.0
|
127
|
-
- 1.0
|
128
|
-
- 1.0
|
129
|
-
- 1.0
|
130
|
-
FOCAL_LOSS_ALPHA: 0.25
|
131
|
-
FOCAL_LOSS_GAMMA: 2.0
|
132
|
-
IN_FEATURES:
|
133
|
-
- p3
|
134
|
-
- p4
|
135
|
-
- p5
|
136
|
-
- p6
|
137
|
-
- p7
|
138
|
-
IOU_LABELS:
|
139
|
-
- 0
|
140
|
-
- -1
|
141
|
-
- 1
|
142
|
-
IOU_THRESHOLDS:
|
143
|
-
- 0.4
|
144
|
-
- 0.5
|
145
|
-
NMS_THRESH_TEST: 0.5
|
146
|
-
NORM: ''
|
147
|
-
NUM_CLASSES: 10
|
148
|
-
NUM_CONVS: 4
|
149
|
-
PRIOR_PROB: 0.01
|
150
|
-
SCORE_THRESH_TEST: 0.05
|
151
|
-
SMOOTH_L1_LOSS_BETA: 0.1
|
152
|
-
TOPK_CANDIDATES_TEST: 1000
|
153
|
-
ROI_BOX_CASCADE_HEAD:
|
154
|
-
BBOX_REG_WEIGHTS:
|
155
|
-
- - 10.0
|
156
|
-
- 10.0
|
157
|
-
- 5.0
|
158
|
-
- 5.0
|
159
|
-
- - 20.0
|
160
|
-
- 20.0
|
161
|
-
- 10.0
|
162
|
-
- 10.0
|
163
|
-
- - 30.0
|
164
|
-
- 30.0
|
165
|
-
- 15.0
|
166
|
-
- 15.0
|
167
|
-
IOUS:
|
168
|
-
- 0.5
|
169
|
-
- 0.6
|
170
|
-
- 0.7
|
171
|
-
ROI_BOX_HEAD:
|
172
|
-
BBOX_REG_LOSS_TYPE: smooth_l1
|
173
|
-
BBOX_REG_LOSS_WEIGHT: 1.0
|
174
|
-
BBOX_REG_WEIGHTS:
|
175
|
-
- 10.0
|
176
|
-
- 10.0
|
177
|
-
- 5.0
|
178
|
-
- 5.0
|
179
|
-
CLS_AGNOSTIC_BBOX_REG: true
|
180
|
-
CONV_DIM: 256
|
181
|
-
FC_DIM: 1024
|
182
|
-
NAME: FastRCNNConvFCHead
|
183
|
-
NORM: ''
|
184
|
-
NUM_CONV: 0
|
185
|
-
NUM_FC: 2
|
186
|
-
POOLER_RESOLUTION: 7
|
187
|
-
POOLER_SAMPLING_RATIO: 0
|
188
|
-
POOLER_TYPE: ROIAlignV2
|
189
|
-
SMOOTH_L1_BETA: 0.0
|
190
|
-
TRAIN_ON_PRED_BOXES: false
|
191
|
-
ROI_HEADS:
|
192
|
-
BATCH_SIZE_PER_IMAGE: 512
|
193
|
-
IN_FEATURES:
|
194
|
-
- p2
|
195
|
-
- p3
|
196
|
-
- p4
|
197
|
-
- p5
|
198
|
-
IOU_LABELS:
|
199
|
-
- 0
|
200
|
-
- 1
|
201
|
-
IOU_THRESHOLDS:
|
202
|
-
- 0.5
|
203
|
-
NAME: CascadeROIHeads
|
204
|
-
NMS_THRESH_TEST: 0.5
|
205
|
-
NUM_CLASSES: 10
|
206
|
-
POSITIVE_FRACTION: 0.25
|
207
|
-
PROPOSAL_APPEND_GT: true
|
208
|
-
SCORE_THRESH_TEST: 0.05
|
209
|
-
ROI_KEYPOINT_HEAD:
|
210
|
-
CONV_DIMS:
|
211
|
-
- 512
|
212
|
-
- 512
|
213
|
-
- 512
|
214
|
-
- 512
|
215
|
-
- 512
|
216
|
-
- 512
|
217
|
-
- 512
|
218
|
-
- 512
|
219
|
-
LOSS_WEIGHT: 1.0
|
220
|
-
MIN_KEYPOINTS_PER_IMAGE: 1
|
221
|
-
NAME: KRCNNConvDeconvUpsampleHead
|
222
|
-
NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
|
223
|
-
NUM_KEYPOINTS: 17
|
224
|
-
POOLER_RESOLUTION: 14
|
225
|
-
POOLER_SAMPLING_RATIO: 0
|
226
|
-
POOLER_TYPE: ROIAlignV2
|
227
|
-
ROI_MASK_HEAD:
|
228
|
-
CLS_AGNOSTIC_MASK: false
|
229
|
-
CONV_DIM: 256
|
230
|
-
NAME: MaskRCNNConvUpsampleHead
|
231
|
-
NORM: ''
|
232
|
-
NUM_CONV: 4
|
233
|
-
POOLER_RESOLUTION: 14
|
234
|
-
POOLER_SAMPLING_RATIO: 0
|
235
|
-
POOLER_TYPE: ROIAlignV2
|
236
|
-
RPN:
|
237
|
-
BATCH_SIZE_PER_IMAGE: 256
|
238
|
-
BBOX_REG_LOSS_TYPE: smooth_l1
|
239
|
-
BBOX_REG_LOSS_WEIGHT: 1.0
|
240
|
-
BBOX_REG_WEIGHTS:
|
241
|
-
- 1.0
|
242
|
-
- 1.0
|
243
|
-
- 1.0
|
244
|
-
- 1.0
|
245
|
-
BOUNDARY_THRESH: -1
|
246
|
-
CONV_DIMS:
|
247
|
-
- -1
|
248
|
-
HEAD_NAME: StandardRPNHead
|
249
|
-
IN_FEATURES:
|
250
|
-
- p2
|
251
|
-
- p3
|
252
|
-
- p4
|
253
|
-
- p5
|
254
|
-
- p6
|
255
|
-
IOU_LABELS:
|
256
|
-
- 0
|
257
|
-
- -1
|
258
|
-
- 1
|
259
|
-
IOU_THRESHOLDS:
|
260
|
-
- 0.3
|
261
|
-
- 0.7
|
262
|
-
LOSS_WEIGHT: 1.0
|
263
|
-
NMS_THRESH: 0.7
|
264
|
-
POSITIVE_FRACTION: 0.5
|
265
|
-
POST_NMS_TOPK_TEST: 1000
|
266
|
-
POST_NMS_TOPK_TRAIN: 2000
|
267
|
-
PRE_NMS_TOPK_TEST: 1000
|
268
|
-
PRE_NMS_TOPK_TRAIN: 2000
|
269
|
-
SMOOTH_L1_BETA: 0.0
|
270
|
-
SEM_SEG_HEAD:
|
271
|
-
COMMON_STRIDE: 4
|
272
|
-
CONVS_DIM: 128
|
273
|
-
IGNORE_VALUE: 255
|
274
|
-
IN_FEATURES:
|
275
|
-
- p2
|
276
|
-
- p3
|
277
|
-
- p4
|
278
|
-
- p5
|
279
|
-
LOSS_WEIGHT: 1.0
|
280
|
-
NAME: SemSegFPNHead
|
281
|
-
NORM: GN
|
282
|
-
NUM_CLASSES: 10
|
283
|
-
VIT:
|
284
|
-
DROP_PATH: 0.1
|
285
|
-
IMG_SIZE:
|
286
|
-
- 224
|
287
|
-
- 224
|
288
|
-
NAME: layoutlmv3_base
|
289
|
-
OUT_FEATURES:
|
290
|
-
- layer3
|
291
|
-
- layer5
|
292
|
-
- layer7
|
293
|
-
- layer11
|
294
|
-
POS_TYPE: abs
|
295
|
-
WEIGHTS:
|
296
|
-
OUTPUT_DIR:
|
297
|
-
SCIHUB_DATA_DIR_TRAIN: ~/publaynet/layout_scihub/train
|
298
|
-
SEED: 42
|
299
|
-
SOLVER:
|
300
|
-
AMP:
|
301
|
-
ENABLED: true
|
302
|
-
BACKBONE_MULTIPLIER: 1.0
|
303
|
-
BASE_LR: 0.0002
|
304
|
-
BIAS_LR_FACTOR: 1.0
|
305
|
-
CHECKPOINT_PERIOD: 2000
|
306
|
-
CLIP_GRADIENTS:
|
307
|
-
CLIP_TYPE: full_model
|
308
|
-
CLIP_VALUE: 1.0
|
309
|
-
ENABLED: true
|
310
|
-
NORM_TYPE: 2.0
|
311
|
-
GAMMA: 0.1
|
312
|
-
GRADIENT_ACCUMULATION_STEPS: 1
|
313
|
-
IMS_PER_BATCH: 32
|
314
|
-
LR_SCHEDULER_NAME: WarmupCosineLR
|
315
|
-
MAX_ITER: 20000
|
316
|
-
MOMENTUM: 0.9
|
317
|
-
NESTEROV: false
|
318
|
-
OPTIMIZER: ADAMW
|
319
|
-
REFERENCE_WORLD_SIZE: 0
|
320
|
-
STEPS:
|
321
|
-
- 10000
|
322
|
-
WARMUP_FACTOR: 0.01
|
323
|
-
WARMUP_ITERS: 333
|
324
|
-
WARMUP_METHOD: linear
|
325
|
-
WEIGHT_DECAY: 0.05
|
326
|
-
WEIGHT_DECAY_BIAS: null
|
327
|
-
WEIGHT_DECAY_NORM: 0.0
|
328
|
-
TEST:
|
329
|
-
AUG:
|
330
|
-
ENABLED: false
|
331
|
-
FLIP: true
|
332
|
-
MAX_SIZE: 4000
|
333
|
-
MIN_SIZES:
|
334
|
-
- 400
|
335
|
-
- 500
|
336
|
-
- 600
|
337
|
-
- 700
|
338
|
-
- 800
|
339
|
-
- 900
|
340
|
-
- 1000
|
341
|
-
- 1100
|
342
|
-
- 1200
|
343
|
-
DETECTIONS_PER_IMAGE: 100
|
344
|
-
EVAL_PERIOD: 1000
|
345
|
-
EXPECTED_RESULTS: []
|
346
|
-
KEYPOINT_OKS_SIGMAS: []
|
347
|
-
PRECISE_BN:
|
348
|
-
ENABLED: false
|
349
|
-
NUM_ITER: 200
|
350
|
-
VERSION: 2
|
351
|
-
VIS_PERIOD: 0
|
magic_pdf-1.2.2.dist-info/RECORD
DELETED
@@ -1,147 +0,0 @@
|
|
1
|
-
magic_pdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
magic_pdf/pdf_parse_union_core_v2.py,sha256=Pt3UtPQgOrF2YudQqrwVVC767_271E-LRg2aUsiggXg,38435
|
3
|
-
magic_pdf/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
magic_pdf/config/constants.py,sha256=fXGzANULnJWLPxwYp3BEFWx-rnorzpySMx63ffyEyq4,1272
|
5
|
-
magic_pdf/config/drop_reason.py,sha256=CqjMzBE96Qo8OeFvhhhItY8WhyqsKhE3DmyJLoQZNCc,2248
|
6
|
-
magic_pdf/config/drop_tag.py,sha256=CjveyzhAsHm_bfXB7ZZNKruw1NR-WdKD8Hz6OhQdG0A,680
|
7
|
-
magic_pdf/config/enums.py,sha256=CImYuw4sbKpq9zrj6zrrEvtdoGkjxDt8S8ByNVDpypU,89
|
8
|
-
magic_pdf/config/exceptions.py,sha256=2tsJxYUebVeimyYBGQkc9Nd1kIakTmWmz3SDcfJWy54,784
|
9
|
-
magic_pdf/config/make_content_config.py,sha256=J2eJIhVHBPGwX18zVQomQUOxs8LcfeGLxLgdBEeRvLg,248
|
10
|
-
magic_pdf/config/model_block_type.py,sha256=y5ie2ZLvo-h8OdVk8HOEha6qK0OJFtLmtOhYjrV680g,166
|
11
|
-
magic_pdf/config/ocr_content_type.py,sha256=e_7RBTdShaWvWhMO2SFou7GM521elMH_Jtn5usbHWdY,890
|
12
|
-
magic_pdf/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
magic_pdf/data/dataset.py,sha256=q7wfX99HTVjKCFVpf1mnYn55rK6oF5Dz8O9w4C9cYhw,11196
|
14
|
-
magic_pdf/data/read_api.py,sha256=_faBnYE3iU_EiQLNFjVM6a8IQtOGAcSQNYBZsTSN1d8,5225
|
15
|
-
magic_pdf/data/schemas.py,sha256=oIUTBzK8Wq8Wuy8A_uilWAbVhucRvOs9_f3lSKYgcmQ,664
|
16
|
-
magic_pdf/data/utils.py,sha256=aMeQB3soGUJyoI41hfgWeOZNzPj36SOrewUM7z51AOU,2305
|
17
|
-
magic_pdf/data/data_reader_writer/__init__.py,sha256=QtevUaeSivv9dQKi3Tomfn4Z0E4To0cB8qXTnglxaHc,705
|
18
|
-
magic_pdf/data/data_reader_writer/base.py,sha256=nqmAcdHOXMOJO6RAT3ILligDFaw8Op0STyCw5yOzAbI,1706
|
19
|
-
magic_pdf/data/data_reader_writer/filebase.py,sha256=VbNAxLyo0Io0j7iprJERt_TqxzHAtA7cUyPIaJstToU,2146
|
20
|
-
magic_pdf/data/data_reader_writer/multi_bucket_s3.py,sha256=4pEJ8PPd3nX7sccHobCs0mbDM8BiqDP_sAEz7CIvpNI,5938
|
21
|
-
magic_pdf/data/data_reader_writer/s3.py,sha256=9Oy1cNuXMwG1e8PgZ7AR-pn_MqHAhkgAGnyEZCYoYAA,2408
|
22
|
-
magic_pdf/data/io/__init__.py,sha256=WKaIlu8i5AWYxFCGNJcorAfMnlUQDOF8CX07Ycfnu2c,294
|
23
|
-
magic_pdf/data/io/base.py,sha256=SqNQqe30ZvoVvg7GVv-hLMCjN6yBgDyQQWeLgGsTfhQ,1118
|
24
|
-
magic_pdf/data/io/http.py,sha256=XlKB0DNf4a_uUnfgcclvaaOtmE7lmddx0DnK8A-emAM,958
|
25
|
-
magic_pdf/data/io/s3.py,sha256=hyA7sbNriQy64xd_uyJ7acN_oneQ1Pdmoc7_xcvkue8,3606
|
26
|
-
magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
|
-
magic_pdf/dict2md/ocr_mkcontent.py,sha256=12WeBVxnBzzruk8CfYqqsV2dpH-mDWmE4Osl1RlRoc8,13741
|
28
|
-
magic_pdf/filter/__init__.py,sha256=_7lSez_myu4b6cdzPpQ-NfREuqeBSq_QdyBPKVLyq2U,1505
|
29
|
-
magic_pdf/filter/pdf_classify_by_type.py,sha256=YNYXamxYgEiSujwilCNHOtrwpgJGDiQ597qJfardDVc,42354
|
30
|
-
magic_pdf/filter/pdf_meta_scan.py,sha256=eOuM0-JgaXvHolSgepGoNDJDmv_uITWLQpH_0MfnVQw,17478
|
31
|
-
magic_pdf/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
|
-
magic_pdf/integrations/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
|
-
magic_pdf/integrations/rag/api.py,sha256=t38wvIBzLje4_JzTP3dewMLqV-tQJ-A3B92Sj2oyrfs,2507
|
34
|
-
magic_pdf/integrations/rag/type.py,sha256=Z_1g_ZIOCsb7-FmZBudReIXj8nzGrgj_BygCalhJdmk,3193
|
35
|
-
magic_pdf/integrations/rag/utils.py,sha256=DCb-UhC8TElb6Eq7_6NmmETreKEk5DVE18hNL8sTEBk,11762
|
36
|
-
magic_pdf/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
|
-
magic_pdf/libs/boxbase.py,sha256=DKZXhwpJd-HE2_Du7NmkeeYW5gG-iwX3GeSWL7rYGv0,16956
|
38
|
-
magic_pdf/libs/clean_memory.py,sha256=OsQexCjmBO2i-Hv-0uYQfn72dbUWR8sTW81nG2zlNQQ,479
|
39
|
-
magic_pdf/libs/commons.py,sha256=xD0fGA16KNB5rhbl4zRrOqdrNHYwaRablT_s9W2ZTbw,1174
|
40
|
-
magic_pdf/libs/config_reader.py,sha256=9GXK7jtDyA_jxXKWZAQ69rB02v5UW4mOmo1IaOYOkW0,4374
|
41
|
-
magic_pdf/libs/convert_utils.py,sha256=Ov-lsfCLBPz_15iSJXIslBNmrSf_E_1g_XDWJy8NgO8,143
|
42
|
-
magic_pdf/libs/coordinate_transform.py,sha256=Bbop2cP2uz2ZG0U0gwd7J6EKkgABq5Rv03qf2LMPw80,429
|
43
|
-
magic_pdf/libs/draw_bbox.py,sha256=hpUmpPiQVu7UgWQa3M49dS22G6A9gcG2jpq4dQjTjzA,18331
|
44
|
-
magic_pdf/libs/hash_utils.py,sha256=VEKK9WfFoZgrPfi8kfITjLpr8Ahufs8tXh9R1Y5lAL8,404
|
45
|
-
magic_pdf/libs/json_compressor.py,sha256=6-KCu0lb5ksmyqWtQGb4QqmP-FjRb5dP7P-Hevcn68g,875
|
46
|
-
magic_pdf/libs/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
|
47
|
-
magic_pdf/libs/local_math.py,sha256=tqljQOgqh3fZc146HYhO88JXJaiXMVwArBkk_CSGICc,177
|
48
|
-
magic_pdf/libs/markdown_utils.py,sha256=86v2BmsSV4NkoRZrH4uQD1youJhYFF3vIKr_vDeg3z0,270
|
49
|
-
magic_pdf/libs/path_utils.py,sha256=Hykw_l5CU736b2egHV9P7B-qh3QNKO4nZSGCbsi0Z8E,1043
|
50
|
-
magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3396
|
51
|
-
magic_pdf/libs/pdf_image_tools.py,sha256=kjzSEbm7K0yiHv8kJ4VbZ9HHktM8qvAv3LhxRyDZEQk,1987
|
52
|
-
magic_pdf/libs/performance_stats.py,sha256=BFi4NIsUYlanznYoTVq4hBpj4NOuShAlWBHzebBGVYM,1702
|
53
|
-
magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
|
54
|
-
magic_pdf/libs/version.py,sha256=uuf4VNtTNA93fMhoAur9YafzaKJFnczY-H1SSCSuRVQ,22
|
55
|
-
magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
|
56
|
-
magic_pdf/model/batch_analyze.py,sha256=sbrgOJWycb1Ep6e62CPi6jEyG6VSeklIxc4PmrqaLhM,11933
|
57
|
-
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=T0-h4QmSIDXRzgF5uWO4jQrwIot221l26PXU52xeKiA,7933
|
58
|
-
magic_pdf/model/magic_model.py,sha256=yZKWo_wRck_-YLyFGRiUHGar8sV1Y6458BFLbyBAt74,30682
|
59
|
-
magic_pdf/model/model_list.py,sha256=aqfEJlEfbib3D3ISrxc0Coh6SbffYh8Yq2FlQN35_zA,213
|
60
|
-
magic_pdf/model/pdf_extract_kit.py,sha256=Rd51VNZPKRA_tUbDss-b44d84K6WDG2S87a37Ax7HUA,12224
|
61
|
-
magic_pdf/model/pp_structure_v2.py,sha256=NcqFWL4nUtjl82MFak8HX_8V3i4Aw_fK4dATrIp5uGs,3840
|
62
|
-
magic_pdf/model/sub_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
63
|
-
magic_pdf/model/sub_modules/model_init.py,sha256=Ltwi3Nd5PdVVXRF9fto5nImFVg6w-twAMzOLV_F-c3g,7693
|
64
|
-
magic_pdf/model/sub_modules/model_utils.py,sha256=2pI1Xcr2zCF3b64e4WoFtIbjSmTVYBE4zjyHB23gvmE,2488
|
65
|
-
magic_pdf/model/sub_modules/language_detection/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
66
|
-
magic_pdf/model/sub_modules/language_detection/utils.py,sha256=5nec_loLyYCJ5o6n38AYLz2SKmRvHDCBdt6ka84EaGM,3096
|
67
|
-
magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py,sha256=bl2i7kweoJNdj47FlE9h0B_-nNQrMcW9mCLQ1puMEH8,4893
|
68
|
-
magic_pdf/model/sub_modules/language_detection/yolov11/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
69
|
-
magic_pdf/model/sub_modules/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
70
|
-
magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py,sha256=gy7rc8poO-Zr8511NJjuBV8Uryq5k3JKrstLtCONg0c,2237
|
71
|
-
magic_pdf/model/sub_modules/layout/doclayout_yolo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
72
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
73
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/backbone.py,sha256=1cvSCczgvwOLdvzWyqttoYPMHsXmnzI3w9abJ1bAXoM,7106
|
74
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/beit.py,sha256=e-INve6bpEx_0FM5wYbQcEcelc79tzDlCljTVHaGt1w,30450
|
75
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/deit.py,sha256=Qyn5UWutZ-0GJczexCh-oMMSXtav_g3ovumMFJp8Om4,17000
|
76
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/model_init.py,sha256=PhWqqRwgSSmXTaUlLIjGqnBUNjzxwYDKgMzKjnxNy1k,4528
|
77
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/rcnn_vl.py,sha256=nI4G6AeLRmjavNhs5S2USKh0ozn-ftMuW0F0m_eVy3c,6649
|
78
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/visualizer.py,sha256=H6UYeCCbaN2gbDjGthTkKkPoyWxfE3azRjsR7fVBwnw,49797
|
79
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/__init__.py,sha256=C4N9gXJr7is7uznvQefQ7dOhlzEhdp86Lgh-7p0Y-08,186
|
80
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/__init__.py,sha256=W7V62JOh12NdMZj2H1sde3Il0AqW2VKplmHEsLle6tg,76
|
81
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/cord.py,sha256=jR_lRZxy8SeEvTK3FdlXmQHF0kefJf7ZqwM_8pvyI5E,8153
|
82
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/data_collator.py,sha256=M2TE47BprHSuQJYcoMeWOSpqkr_nh8VK6t2l26XWmxg,6279
|
83
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/funsd.py,sha256=Ez9tMeruHncJlkKQ7iRGBB9Pk1uWtgxlGeqs-sOmIG0,5214
|
84
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/image_utils.py,sha256=vuNOMzYw_h7jmaD2XUqkGlrjDEPB7XUts16GRICBmG4,10334
|
85
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/xfund.py,sha256=6jLKyc_4VhbHY4YEzBXm5RkPdsd9ldnUGXFZBLiJ-_s,8270
|
86
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/__init__.py,sha256=d5bm3Rx-jTrgfJDWrzD7t5R5CdHfug9dCNvUEneIYW4,190
|
87
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py,sha256=a04w_C0B4P9jF-3I_tXCj3fLmfFQR5XSKGbhgGm--pM,1216
|
88
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py,sha256=CJBcAmmLeRFVMN1YjWefoUW7hk0KXek0Eb_tergKl4Y,2150
|
89
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py,sha256=mdo8tO-DrJcv0Lbk9Pp98n3NQXYOnFFyXQWjU7t35kA,54633
|
90
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py,sha256=diKlrfxYjKAmYrUgjYdx-FXLh-swShC3tl-EBX1b3oI,1197
|
91
|
-
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py,sha256=0lxiG69_fGpSSBYA9CBLnDa_qqa1rInZ0pJpqBwZ0Yw,1372
|
92
|
-
magic_pdf/model/sub_modules/mfd/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
93
|
-
magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py,sha256=QfHbMr1br0pOJUu1NJEMgA6yw11G0yFImJv_AfW48_c,1008
|
94
|
-
magic_pdf/model/sub_modules/mfd/yolov8/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
95
|
-
magic_pdf/model/sub_modules/mfr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
96
|
-
magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py,sha256=HPNetRfQeHoHfRTzFEaIjLSHfjrxRvS-EaApMUebZuQ,8020
|
97
|
-
magic_pdf/model/sub_modules/mfr/unimernet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
98
|
-
magic_pdf/model/sub_modules/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
99
|
-
magic_pdf/model/sub_modules/ocr/paddleocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
100
|
-
magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py,sha256=biuLnQWUquZkxmObjpg33iVCPPJKbRA4kx0Uo6OvGyc,12672
|
101
|
-
magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py,sha256=QBBeFN1iF7nj5gqQ5sQXjhpwy8lB4c96gubnRDBuDNU,8424
|
102
|
-
magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_291_mod.py,sha256=VouMTvi6M5TV6pQdlpusgfyZapxiZ_Wi7Ff53eMC3rE,8996
|
103
|
-
magic_pdf/model/sub_modules/reading_oreder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
104
|
-
magic_pdf/model/sub_modules/reading_oreder/layoutreader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
|
-
magic_pdf/model/sub_modules/reading_oreder/layoutreader/helpers.py,sha256=IVUFcNMDF3-kio-BIxjppHnWS3eHPqvvNihIw2fbIFM,4372
|
106
|
-
magic_pdf/model/sub_modules/reading_oreder/layoutreader/xycut.py,sha256=ezNSq_Y4UXiztB58hbXJsjTJlOBqWIjuW5A2uLSaZSo,7349
|
107
|
-
magic_pdf/model/sub_modules/table/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
108
|
-
magic_pdf/model/sub_modules/table/table_utils.py,sha256=B9BC4f5EEjlt2ldYxrIC8Wic2Tz3t3gTJeEyK3ggrOU,282
|
109
|
-
magic_pdf/model/sub_modules/table/rapidtable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
110
|
-
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py,sha256=6TUO6wiA4oZQB2_VP6kngZF6-2cI6mAP57Qf2lv6LVw,2922
|
111
|
-
magic_pdf/model/sub_modules/table/structeqtable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
112
|
-
magic_pdf/model/sub_modules/table/structeqtable/struct_eqtable.py,sha256=SrNPm-uOFEvN5muFGbXTAuwzXm-rCiaihVdqbydIBIA,1131
|
113
|
-
magic_pdf/model/sub_modules/table/tablemaster/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
114
|
-
magic_pdf/model/sub_modules/table/tablemaster/tablemaster_paddle.py,sha256=QEQ-56AzoIAU7UWsEidWW_KDOY5r16qm2kSpox8cxq4,2755
|
115
|
-
magic_pdf/operators/__init__.py,sha256=liU2-WYUvsQ1G4PYBppyvokS9z5IjrnlVMtoBAC1REI,2630
|
116
|
-
magic_pdf/operators/models.py,sha256=mRqbCVrxxaUVDpEBAsXaK7EL1M-goICkE1W0FYgewio,5305
|
117
|
-
magic_pdf/operators/pipes.py,sha256=XgBgisKQd_ruW-3Tw4v5LhqloZUHgn2aFcpi_q8LbCs,6767
|
118
|
-
magic_pdf/post_proc/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
119
|
-
magic_pdf/post_proc/llm_aided.py,sha256=yzhu2cCpUZjdwf3v0swYDgSs9VWIfMAoXepYIP1EMZs,6367
|
120
|
-
magic_pdf/post_proc/para_split_v3.py,sha256=SPN_VVGvFX5KpFMGw9OzgoE-kTZq-FF036i0cIImGH8,16975
|
121
|
-
magic_pdf/pre_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
122
|
-
magic_pdf/pre_proc/construct_page_dict.py,sha256=OFmq5XRKi6fYIo-lmGlL-NB16Sf0egzsfEx-fT2uYrc,660
|
123
|
-
magic_pdf/pre_proc/cut_image.py,sha256=NDzbxwD7z7Tb4uAxL4KR6LzURFdN1Tzr4nPvj-VmEqc,1225
|
124
|
-
magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=nt88ttXCEI_1ihAF7HU15SQjwM69V-iJmk-L_nyzA6o,9328
|
125
|
-
magic_pdf/pre_proc/ocr_dict_merge.py,sha256=4Z3aHZ9sxzijkVpOCENslvUcpp7DXgNID4Gl3pxwIg4,5512
|
126
|
-
magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=xrgC9vR0poklZuY4Og41pZVdXzuaGFg3BnQ01X60dpo,3102
|
127
|
-
magic_pdf/pre_proc/remove_bbox_overlap.py,sha256=mcdxAh4P56NZ3Ij8h3vW8qC_SrszfXflVWuWUuUiTNg,3089
|
128
|
-
magic_pdf/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
|
129
|
-
magic_pdf/resources/model_config/model_configs.yaml,sha256=v3HwFTmIbXJJEBXUHHHMnZQKRo6ZQtP3cncSebh-5gc,322
|
130
|
-
magic_pdf/resources/model_config/UniMERNet/demo.yaml,sha256=Jdaim2D2lAYrV9rhc1X5Sy2_IacGOrfysJhxEUgSElo,827
|
131
|
-
magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml,sha256=9aNAEYgpHTAWpcUrDvuPG2y4V-Qw8QdcJefi96y8yDU,6109
|
132
|
-
magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt,sha256=dV4lcudF8wimEbAooYbvISvFhrXjp9i0rMRqv9VW6hY,3204667
|
133
|
-
magic_pdf/spark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
134
|
-
magic_pdf/spark/spark_api.py,sha256=BYO6zlRW0cEnIUB3ZzNQTu_LsPHEVitqiUN7gy3x_wo,1124
|
135
|
-
magic_pdf/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
136
|
-
magic_pdf/tools/cli.py,sha256=YiX9LU4UeU3yYIpblGO1cbO95Tbo3A8cmWFK_1WvqfU,4134
|
137
|
-
magic_pdf/tools/cli_dev.py,sha256=3RbubfTIagWoFYdu8wSDanr-BJDjFGeDet55jTy7He0,3948
|
138
|
-
magic_pdf/tools/common.py,sha256=1LfMeXBBsb3WlGeNAze_pPOYXQ8Qbfh-JgRXweojHKo,8381
|
139
|
-
magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
140
|
-
magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
|
141
|
-
magic_pdf/utils/office_to_pdf.py,sha256=7aj-Ls2v8saD-Rgu_t3FIc-J3Ka9wnmiEH5zY-H1Vxs,729
|
142
|
-
magic_pdf-1.2.2.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
143
|
-
magic_pdf-1.2.2.dist-info/METADATA,sha256=FYzj0yWzmFAG4mQ22DH9F4KZfqexNg7YuhgiXMHc9Ug,41001
|
144
|
-
magic_pdf-1.2.2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
145
|
-
magic_pdf-1.2.2.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
|
146
|
-
magic_pdf-1.2.2.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
|
147
|
-
magic_pdf-1.2.2.dist-info/RECORD,,
|
File without changes
|
/magic_pdf/model/sub_modules/{table/structeqtable → ocr/paddleocr2pytorch/pytorchocr}/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|