mineru 2.5.4__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mineru/backend/pipeline/model_init.py +25 -3
- mineru/backend/pipeline/model_json_to_middle_json.py +2 -2
- mineru/backend/pipeline/model_list.py +0 -1
- mineru/backend/utils.py +24 -0
- mineru/backend/vlm/model_output_to_middle_json.py +2 -2
- mineru/backend/vlm/{custom_logits_processors.py → utils.py} +36 -2
- mineru/backend/vlm/vlm_analyze.py +43 -50
- mineru/backend/vlm/vlm_magic_model.py +155 -1
- mineru/cli/common.py +25 -22
- mineru/cli/fast_api.py +2 -8
- mineru/cli/gradio_app.py +96 -9
- mineru/cli/models_download.py +1 -0
- mineru/model/mfr/pp_formulanet_plus_m/predict_formula.py +152 -0
- mineru/model/mfr/pp_formulanet_plus_m/processors.py +657 -0
- mineru/model/mfr/unimernet/unimernet_hf/modeling_unimernet.py +1 -326
- mineru/model/mfr/utils.py +338 -0
- mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py +103 -16
- mineru/model/table/rec/unet_table/main.py +1 -1
- mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/data/imaug/operators.py +5 -5
- mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/__init__.py +2 -1
- mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_lcnetv3.py +7 -7
- mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_pphgnetv2.py +2 -2
- mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/__init__.py +2 -0
- mineru/model/utils/pytorchocr/modeling/heads/rec_ppformulanet_head.py +1383 -0
- mineru/model/utils/pytorchocr/modeling/heads/rec_unimernet_head.py +2631 -0
- mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/postprocess/rec_postprocess.py +25 -28
- mineru/model/utils/pytorchocr/utils/__init__.py +0 -0
- mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/arch_config.yaml +130 -0
- mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_arabic_dict.txt +747 -0
- mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_cyrillic_dict.txt +850 -0
- mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_devanagari_dict.txt +568 -0
- mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_ta_dict.txt +513 -0
- mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_te_dict.txt +540 -0
- mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/models_config.yml +15 -15
- mineru/model/utils/pytorchocr/utils/resources/pp_formulanet_arch_config.yaml +24 -0
- mineru/model/utils/tools/infer/__init__.py +1 -0
- mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/predict_det.py +6 -3
- mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/predict_rec.py +16 -25
- mineru/model/vlm_vllm_model/server.py +4 -1
- mineru/resources/header.html +2 -2
- mineru/utils/enum_class.py +1 -0
- mineru/utils/llm_aided.py +4 -2
- mineru/utils/ocr_utils.py +16 -0
- mineru/utils/table_merge.py +102 -13
- mineru/version.py +1 -1
- {mineru-2.5.4.dist-info → mineru-2.6.0.dist-info}/METADATA +32 -8
- mineru-2.6.0.dist-info/RECORD +195 -0
- mineru-2.5.4.dist-info/RECORD +0 -181
- /mineru/model/{ocr/paddleocr2pytorch/pytorchocr → mfr/pp_formulanet_plus_m}/__init__.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch/tools/infer → utils}/__init__.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch/pytorchocr/modeling → utils/pytorchocr}/__init__.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/base_ocr_v20.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/data/__init__.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/data/imaug/__init__.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch/pytorchocr/utils → utils/pytorchocr/modeling}/__init__.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/architectures/__init__.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/architectures/base_model.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/det_mobilenet_v3.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_donut_swin.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_hgnet.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_mobilenet_v3.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_mv1_enhance.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_svtrnet.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/common.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/cls_head.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/det_db_head.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/rec_ctc_head.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/rec_multi_head.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/necks/__init__.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/necks/db_fpn.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/necks/intracl.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/necks/rnn.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/postprocess/__init__.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/postprocess/cls_postprocess.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/postprocess/db_postprocess.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/arabic_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/chinese_cht_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/cyrillic_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/devanagari_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/en_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/japan_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ka_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/korean_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/latin_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_el_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_en_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_eslav_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_korean_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ppocrv5_th_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/ta_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/utils/resources/dict/te_dict.txt +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/tools/__init__.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/predict_cls.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/predict_system.py +0 -0
- /mineru/model/{ocr/paddleocr2pytorch → utils}/tools/infer/pytorchocr_utility.py +0 -0
- {mineru-2.5.4.dist-info → mineru-2.6.0.dist-info}/WHEEL +0 -0
- {mineru-2.5.4.dist-info → mineru-2.6.0.dist-info}/entry_points.txt +0 -0
- {mineru-2.5.4.dist-info → mineru-2.6.0.dist-info}/licenses/LICENSE.md +0 -0
- {mineru-2.5.4.dist-info → mineru-2.6.0.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import sys
|
|
2
3
|
|
|
3
4
|
from mineru.backend.vlm.custom_logits_processors import enable_custom_logits_processors
|
|
@@ -42,7 +43,7 @@ def main():
|
|
|
42
43
|
if not has_port_arg:
|
|
43
44
|
args.extend(["--port", "30000"])
|
|
44
45
|
if not has_gpu_memory_utilization_arg:
|
|
45
|
-
args.extend(["--gpu-memory-utilization", "0.
|
|
46
|
+
args.extend(["--gpu-memory-utilization", "0.7"])
|
|
46
47
|
if not model_path:
|
|
47
48
|
model_path = auto_download_and_get_model_root_path("/", "vlm")
|
|
48
49
|
if (not has_logits_processors_arg) and custom_logits_processors:
|
|
@@ -51,6 +52,8 @@ def main():
|
|
|
51
52
|
# 重构参数,将模型路径作为位置参数
|
|
52
53
|
sys.argv = [sys.argv[0]] + ["serve", model_path] + args
|
|
53
54
|
|
|
55
|
+
os.environ["OMP_NUM_THREADS"] = "1"
|
|
56
|
+
|
|
54
57
|
# 启动vllm服务器
|
|
55
58
|
print(f"start vllm server: {sys.argv}")
|
|
56
59
|
vllm_main()
|
mineru/resources/header.html
CHANGED
|
@@ -99,7 +99,7 @@
|
|
|
99
99
|
</span>
|
|
100
100
|
<!-- arXiv Link. -->
|
|
101
101
|
<span class="link-block">
|
|
102
|
-
<a href="https://arxiv.org/abs/
|
|
102
|
+
<a href="https://arxiv.org/abs/2509.22186" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
|
|
103
103
|
<span class="icon" style="margin-right: 8px">
|
|
104
104
|
<i class="fas fa-file" style="color: white"></i>
|
|
105
105
|
</span>
|
|
@@ -134,4 +134,4 @@
|
|
|
134
134
|
</div>
|
|
135
135
|
|
|
136
136
|
|
|
137
|
-
</body></html>
|
|
137
|
+
</body></html>
|
mineru/utils/enum_class.py
CHANGED
|
@@ -70,6 +70,7 @@ class ModelPath:
|
|
|
70
70
|
doclayout_yolo = "models/Layout/YOLO/doclayout_yolo_docstructbench_imgsz1280_2501.pt"
|
|
71
71
|
yolo_v8_mfd = "models/MFD/YOLO/yolo_v8_ft.pt"
|
|
72
72
|
unimernet_small = "models/MFR/unimernet_hf_small_2503"
|
|
73
|
+
pp_formulanet_plus_m = "models/MFR/pp_formulanet_plus_m"
|
|
73
74
|
pytorch_paddle = "models/OCR/paddleocr_torch"
|
|
74
75
|
layout_reader = "models/ReadingOrder/layout_reader"
|
|
75
76
|
slanet_plus = "models/TabRec/SlanetPlus/slanet-plus.onnx"
|
mineru/utils/llm_aided.py
CHANGED
|
@@ -51,7 +51,7 @@ def llm_aided_title(page_info_list, title_aided_config):
|
|
|
51
51
|
3. 保持字典内key-value的对应关系不变
|
|
52
52
|
|
|
53
53
|
4. 优化层次结构:
|
|
54
|
-
-
|
|
54
|
+
- 根据标题内容的语义为每个标题元素添加适当的层次结构
|
|
55
55
|
- 行高较大的标题一般是更高级别的标题
|
|
56
56
|
- 标题从前至后的层级必须是连续的,不能跳过层级
|
|
57
57
|
- 标题层级最多为4级,不要添加过多的层级
|
|
@@ -61,7 +61,6 @@ def llm_aided_title(page_info_list, title_aided_config):
|
|
|
61
61
|
- 在完成初步分级后,仔细检查分级结果的合理性
|
|
62
62
|
- 根据上下文关系和逻辑顺序,对不合理的分级进行微调
|
|
63
63
|
- 确保最终的分级结果符合文档的实际结构和逻辑
|
|
64
|
-
- 字典中可能包含被误当成标题的正文,你可以通过将其层级标记为 0 来排除它们
|
|
65
64
|
|
|
66
65
|
IMPORTANT:
|
|
67
66
|
请直接返回优化过的由标题层级组成的字典,格式为{{标题id:标题层级}},如下:
|
|
@@ -78,6 +77,8 @@ Input title list:
|
|
|
78
77
|
|
|
79
78
|
Corrected title list:
|
|
80
79
|
"""
|
|
80
|
+
#5.
|
|
81
|
+
#- 字典中可能包含被误当成标题的正文,你可以通过将其层级标记为 0 来排除它们
|
|
81
82
|
|
|
82
83
|
retry_count = 0
|
|
83
84
|
max_retries = 3
|
|
@@ -89,6 +90,7 @@ Corrected title list:
|
|
|
89
90
|
model=title_aided_config["model"],
|
|
90
91
|
messages=[
|
|
91
92
|
{'role': 'user', 'content': title_optimize_prompt}],
|
|
93
|
+
extra_body={"enable_thinking": False},
|
|
92
94
|
temperature=0.7,
|
|
93
95
|
stream=True,
|
|
94
96
|
)
|
mineru/utils/ocr_utils.py
CHANGED
|
@@ -406,6 +406,12 @@ def calculate_is_angle(poly):
|
|
|
406
406
|
# logger.info((p3[1] - p1[1])/height)
|
|
407
407
|
return True
|
|
408
408
|
|
|
409
|
+
def is_bbox_aligned_rect(points):
|
|
410
|
+
x_coords = points[:, 0]
|
|
411
|
+
y_coords = points[:, 1]
|
|
412
|
+
unique_x = np.unique(x_coords)
|
|
413
|
+
unique_y = np.unique(y_coords)
|
|
414
|
+
return len(unique_x) == 2 and len(unique_y) == 2
|
|
409
415
|
|
|
410
416
|
def get_rotate_crop_image(img, points):
|
|
411
417
|
'''
|
|
@@ -419,6 +425,16 @@ def get_rotate_crop_image(img, points):
|
|
|
419
425
|
points[:, 1] = points[:, 1] - top
|
|
420
426
|
'''
|
|
421
427
|
assert len(points) == 4, "shape of points must be 4*2"
|
|
428
|
+
|
|
429
|
+
if is_bbox_aligned_rect(points):
|
|
430
|
+
xmin = int(np.min(points[:, 0]))
|
|
431
|
+
xmax = int(np.max(points[:, 0]))
|
|
432
|
+
ymin = int(np.min(points[:, 1]))
|
|
433
|
+
ymax = int(np.max(points[:, 1]))
|
|
434
|
+
new_img = img[ymin:ymax, xmin:xmax].copy()
|
|
435
|
+
if new_img.shape[0] > 0 and new_img.shape[1] > 0:
|
|
436
|
+
return new_img
|
|
437
|
+
|
|
422
438
|
img_crop_width = int(
|
|
423
439
|
max(
|
|
424
440
|
np.linalg.norm(points[0] - points[1]),
|
mineru/utils/table_merge.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from loguru import logger
|
|
4
4
|
from bs4 import BeautifulSoup
|
|
5
5
|
|
|
6
|
+
from mineru.backend.vlm.vlm_middle_json_mkcontent import merge_para_with_text
|
|
6
7
|
from mineru.utils.enum_class import BlockType, SplitFlag
|
|
7
8
|
|
|
8
9
|
|
|
@@ -144,8 +145,9 @@ def detect_table_headers(soup1, soup2, max_header_rows=5):
|
|
|
144
145
|
colspan2 = int(cell2.get("colspan", 1))
|
|
145
146
|
rowspan2 = int(cell2.get("rowspan", 1))
|
|
146
147
|
|
|
147
|
-
|
|
148
|
-
|
|
148
|
+
# 去除所有空白字符(包括空格、换行、制表符等)
|
|
149
|
+
text1 = ''.join(full_to_half(cell1.get_text()).split())
|
|
150
|
+
text2 = ''.join(full_to_half(cell2.get_text()).split())
|
|
149
151
|
|
|
150
152
|
if colspan1 != colspan2 or rowspan1 != rowspan2 or text1 != text2:
|
|
151
153
|
structure_match = False
|
|
@@ -169,8 +171,12 @@ def detect_table_headers(soup1, soup2, max_header_rows=5):
|
|
|
169
171
|
def can_merge_tables(current_table_block, previous_table_block):
|
|
170
172
|
"""判断两个表格是否可以合并"""
|
|
171
173
|
# 检查表格是否有caption和footnote
|
|
172
|
-
|
|
173
|
-
|
|
174
|
+
# 如果有TABLE_CAPTION类型的块,检查是否至少有一个以"(续)"结尾
|
|
175
|
+
caption_blocks = [block for block in current_table_block["blocks"] if block["type"] == BlockType.TABLE_CAPTION]
|
|
176
|
+
if caption_blocks:
|
|
177
|
+
# 如果所有caption都不以"(续)"结尾,则不合并
|
|
178
|
+
if not any(full_to_half(merge_para_with_text(block).strip()).endswith("(续)") for block in caption_blocks):
|
|
179
|
+
return False, None, None, None, None
|
|
174
180
|
|
|
175
181
|
if any(block["type"] == BlockType.TABLE_FOOTNOTE for block in previous_table_block["blocks"]):
|
|
176
182
|
return False, None, None, None, None
|
|
@@ -253,6 +259,59 @@ def check_rows_match(soup1, soup2):
|
|
|
253
259
|
return last_row_cols == first_row_cols or last_row_visual_cols == first_row_visual_cols
|
|
254
260
|
|
|
255
261
|
|
|
262
|
+
def check_row_columns_match(row1, row2):
|
|
263
|
+
# 逐个cell检测colspan属性是否一致
|
|
264
|
+
cells1 = row1.find_all(["td", "th"])
|
|
265
|
+
cells2 = row2.find_all(["td", "th"])
|
|
266
|
+
if len(cells1) != len(cells2):
|
|
267
|
+
return False
|
|
268
|
+
for cell1, cell2 in zip(cells1, cells2):
|
|
269
|
+
colspan1 = int(cell1.get("colspan", 1))
|
|
270
|
+
colspan2 = int(cell2.get("colspan", 1))
|
|
271
|
+
if colspan1 != colspan2:
|
|
272
|
+
return False
|
|
273
|
+
return True
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def adjust_table_rows_colspan(rows, start_idx, end_idx,
|
|
277
|
+
reference_structure, reference_visual_cols,
|
|
278
|
+
target_cols, current_cols, reference_row):
|
|
279
|
+
"""调整表格行的colspan属性以匹配目标列数
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
rows: 表格行列表
|
|
283
|
+
start_idx: 起始行索引
|
|
284
|
+
end_idx: 结束行索引(不包含)
|
|
285
|
+
reference_structure: 参考行的colspan结构列表
|
|
286
|
+
reference_visual_cols: 参考行的视觉列数
|
|
287
|
+
target_cols: 目标总列数
|
|
288
|
+
current_cols: 当前总列数
|
|
289
|
+
reference_row: 参考行对象
|
|
290
|
+
"""
|
|
291
|
+
for i in range(start_idx, end_idx):
|
|
292
|
+
row = rows[i]
|
|
293
|
+
cells = row.find_all(["td", "th"])
|
|
294
|
+
if not cells:
|
|
295
|
+
continue
|
|
296
|
+
|
|
297
|
+
current_row_cols = calculate_row_columns(row)
|
|
298
|
+
if current_row_cols >= target_cols:
|
|
299
|
+
continue
|
|
300
|
+
|
|
301
|
+
# 检查是否与参考行结构匹配
|
|
302
|
+
if calculate_visual_columns(row) == reference_visual_cols and check_row_columns_match(row, reference_row):
|
|
303
|
+
# 尝试应用参考结构
|
|
304
|
+
if len(cells) <= len(reference_structure):
|
|
305
|
+
for j, cell in enumerate(cells):
|
|
306
|
+
if j < len(reference_structure) and reference_structure[j] > 1:
|
|
307
|
+
cell["colspan"] = str(reference_structure[j])
|
|
308
|
+
else:
|
|
309
|
+
# 扩展最后一个单元格以填补列数差异
|
|
310
|
+
last_cell = cells[-1]
|
|
311
|
+
current_last_span = int(last_cell.get("colspan", 1))
|
|
312
|
+
last_cell["colspan"] = str(current_last_span + (target_cols - current_cols))
|
|
313
|
+
|
|
314
|
+
|
|
256
315
|
def perform_table_merge(soup1, soup2, previous_table_block, wait_merge_table_footnotes):
|
|
257
316
|
"""执行表格合并操作"""
|
|
258
317
|
# 检测表头有几行,并确认表头内容是否一致
|
|
@@ -263,17 +322,47 @@ def perform_table_merge(soup1, soup2, previous_table_block, wait_merge_table_foo
|
|
|
263
322
|
# 找到第一个表格的tbody,如果没有则查找table元素
|
|
264
323
|
tbody1 = soup1.find("tbody") or soup1.find("table")
|
|
265
324
|
|
|
266
|
-
#
|
|
267
|
-
|
|
325
|
+
# 获取表1和表2的所有行
|
|
326
|
+
rows1 = soup1.find_all("tr")
|
|
327
|
+
rows2 = soup2.find_all("tr")
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
if rows1 and rows2 and header_count < len(rows2):
|
|
331
|
+
# 获取表1最后一行和表2第一个非表头行
|
|
332
|
+
last_row1 = rows1[-1]
|
|
333
|
+
first_data_row2 = rows2[header_count]
|
|
334
|
+
|
|
335
|
+
# 计算表格总列数
|
|
336
|
+
table_cols1 = calculate_table_total_columns(soup1)
|
|
337
|
+
table_cols2 = calculate_table_total_columns(soup2)
|
|
338
|
+
if table_cols1 >= table_cols2:
|
|
339
|
+
reference_structure = [int(cell.get("colspan", 1)) for cell in last_row1.find_all(["td", "th"])]
|
|
340
|
+
reference_visual_cols = calculate_visual_columns(last_row1)
|
|
341
|
+
# 以表1的最后一行为参考,调整表2的行
|
|
342
|
+
adjust_table_rows_colspan(
|
|
343
|
+
rows2, header_count, len(rows2),
|
|
344
|
+
reference_structure, reference_visual_cols,
|
|
345
|
+
table_cols1, table_cols2, first_data_row2
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
else: # table_cols2 > table_cols1
|
|
349
|
+
reference_structure = [int(cell.get("colspan", 1)) for cell in first_data_row2.find_all(["td", "th"])]
|
|
350
|
+
reference_visual_cols = calculate_visual_columns(first_data_row2)
|
|
351
|
+
# 以表2的第一个数据行为参考,调整表1的行
|
|
352
|
+
adjust_table_rows_colspan(
|
|
353
|
+
rows1, 0, len(rows1),
|
|
354
|
+
reference_structure, reference_visual_cols,
|
|
355
|
+
table_cols2, table_cols1, last_row1
|
|
356
|
+
)
|
|
268
357
|
|
|
269
358
|
# 将第二个表格的行添加到第一个表格中
|
|
270
|
-
if tbody1
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
359
|
+
if tbody1:
|
|
360
|
+
tbody2 = soup2.find("tbody") or soup2.find("table")
|
|
361
|
+
if tbody2:
|
|
362
|
+
# 将第二个表格的行添加到第一个表格中(跳过表头行)
|
|
363
|
+
for row in rows2[header_count:]:
|
|
364
|
+
row.extract()
|
|
365
|
+
tbody1.append(row)
|
|
277
366
|
|
|
278
367
|
# 添加待合并表格的footnote到前一个表格中
|
|
279
368
|
for table_footnote in wait_merge_table_footnotes:
|
mineru/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "2.
|
|
1
|
+
__version__ = "2.6.0"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mineru
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.6.0
|
|
4
4
|
Summary: A practical tool for converting PDF to Markdown
|
|
5
5
|
License: AGPL-3.0
|
|
6
6
|
Project-URL: homepage, https://mineru.net/
|
|
@@ -34,10 +34,10 @@ Requires-Dist: json-repair>=0.46.2
|
|
|
34
34
|
Requires-Dist: opencv-python>=4.11.0.86
|
|
35
35
|
Requires-Dist: fast-langdetect<0.3.0,>=0.2.3
|
|
36
36
|
Requires-Dist: scikit-image<1.0.0,>=0.25.0
|
|
37
|
-
Requires-Dist: openai<
|
|
37
|
+
Requires-Dist: openai<3,>=1.70.0
|
|
38
38
|
Requires-Dist: beautifulsoup4<5,>=4.13.5
|
|
39
39
|
Requires-Dist: magika<0.7.0,>=0.6.2
|
|
40
|
-
Requires-Dist: mineru-vl-utils<1,>=0.1.
|
|
40
|
+
Requires-Dist: mineru-vl-utils<1,>=0.1.14
|
|
41
41
|
Provides-Extra: test
|
|
42
42
|
Requires-Dist: mineru[core]; extra == "test"
|
|
43
43
|
Requires-Dist: pytest; extra == "test"
|
|
@@ -49,7 +49,7 @@ Requires-Dist: torch<3,>=2.6.0; extra == "vlm"
|
|
|
49
49
|
Requires-Dist: transformers<5.0.0,>=4.51.1; extra == "vlm"
|
|
50
50
|
Requires-Dist: accelerate>=1.5.1; extra == "vlm"
|
|
51
51
|
Provides-Extra: vllm
|
|
52
|
-
Requires-Dist: vllm<0.
|
|
52
|
+
Requires-Dist: vllm<0.12,>=0.10.1.1; extra == "vllm"
|
|
53
53
|
Provides-Extra: pipeline
|
|
54
54
|
Requires-Dist: matplotlib<4,>=3.10; extra == "pipeline"
|
|
55
55
|
Requires-Dist: ultralytics<9,>=8.3.48; extra == "pipeline"
|
|
@@ -84,7 +84,7 @@ Dynamic: license-file
|
|
|
84
84
|
<div align="center" xmlns="http://www.w3.org/1999/html">
|
|
85
85
|
<!-- logo -->
|
|
86
86
|
<p align="center">
|
|
87
|
-
<img src="docs/images/MinerU-logo.png" width="300px" style="vertical-align:middle;">
|
|
87
|
+
<img src="https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docs/images/MinerU-logo.png" width="300px" style="vertical-align:middle;">
|
|
88
88
|
</p>
|
|
89
89
|
|
|
90
90
|
<!-- icon -->
|
|
@@ -101,7 +101,8 @@ Dynamic: license-file
|
|
|
101
101
|
[](https://huggingface.co/spaces/opendatalab/MinerU)
|
|
102
102
|
[](https://www.modelscope.cn/studios/OpenDataLab/MinerU)
|
|
103
103
|
[](https://colab.research.google.com/gist/myhloli/a3cb16570ab3cfeadf9d8f0ac91b4fca/mineru_demo.ipynb)
|
|
104
|
-
[](https://arxiv.org/abs/2409.18839)
|
|
105
|
+
[](https://arxiv.org/abs/2509.22186)
|
|
105
106
|
[](https://deepwiki.com/opendatalab/MinerU)
|
|
106
107
|
|
|
107
108
|
|
|
@@ -126,8 +127,21 @@ Dynamic: license-file
|
|
|
126
127
|
</div>
|
|
127
128
|
|
|
128
129
|
# Changelog
|
|
129
|
-
|
|
130
|
-
-
|
|
130
|
+
- 2025/10/24 2.6.0 Release
|
|
131
|
+
- `pipeline` backend optimizations
|
|
132
|
+
- Added experimental support for Chinese formulas, which can be enabled by setting the environment variable `export MINERU_FORMULA_CH_SUPPORT=1`. This feature may cause a slight decrease in MFR speed and failures in recognizing some long formulas. It is recommended to enable it only when parsing Chinese formulas is needed. To disable this feature, set the environment variable to `0`.
|
|
133
|
+
- `OCR` speed significantly improved by 200%~300%, thanks to the optimization solution provided by [@cjsdurj](https://github.com/cjsdurj)
|
|
134
|
+
- `OCR` models updated to `ppocr-v5` version for Cyrillic, Arabic, Devanagari, Telugu (te), and Tamil (ta) languages, with accuracy improved by over 40% compared to previous models
|
|
135
|
+
- `vlm` backend optimizations
|
|
136
|
+
- `table_caption` and `table_footnote` matching logic optimized to improve the accuracy of table caption and footnote matching and reading order rationality in scenarios with multiple consecutive tables on a page
|
|
137
|
+
- Optimized CPU resource usage during high concurrency when using `vllm` backend, reducing server pressure
|
|
138
|
+
- Adapted to `vllm` version 0.11.0
|
|
139
|
+
- General optimizations
|
|
140
|
+
- Cross-page table merging effect optimized, added support for cross-page continuation table merging, improving table merging effectiveness in multi-column merge scenarios
|
|
141
|
+
- Added environment variable configuration option `MINERU_TABLE_MERGE_ENABLE` for table merging feature. Table merging is enabled by default and can be disabled by setting this variable to `0`
|
|
142
|
+
|
|
143
|
+
- 2025/09/26 2.5.4 released
|
|
144
|
+
- 🎉🎉 The MinerU2.5 [Technical Report](https://arxiv.org/abs/2509.22186) is now available! We welcome you to read it for a comprehensive overview of its model architecture, training strategy, data engineering and evaluation results.
|
|
131
145
|
- Fixed an issue where some `PDF` files were mistakenly identified as `AI` files, causing parsing failures
|
|
132
146
|
|
|
133
147
|
- 2025/09/20 2.5.3 Released
|
|
@@ -825,6 +839,16 @@ Currently, some models in this project are trained based on YOLO. However, since
|
|
|
825
839
|
# Citation
|
|
826
840
|
|
|
827
841
|
```bibtex
|
|
842
|
+
@misc{niu2025mineru25decoupledvisionlanguagemodel,
|
|
843
|
+
title={MinerU2.5: A Decoupled Vision-Language Model for Efficient High-Resolution Document Parsing},
|
|
844
|
+
author={Junbo Niu and Zheng Liu and Zhuangcheng Gu and Bin Wang and Linke Ouyang and Zhiyuan Zhao and Tao Chu and Tianyao He and Fan Wu and Qintong Zhang and Zhenjiang Jin and Guang Liang and Rui Zhang and Wenzheng Zhang and Yuan Qu and Zhifei Ren and Yuefeng Sun and Yuanhong Zheng and Dongsheng Ma and Zirui Tang and Boyu Niu and Ziyang Miao and Hejun Dong and Siyi Qian and Junyuan Zhang and Jingzhou Chen and Fangdong Wang and Xiaomeng Zhao and Liqun Wei and Wei Li and Shasha Wang and Ruiliang Xu and Yuanyuan Cao and Lu Chen and Qianqian Wu and Huaiyu Gu and Lindong Lu and Keming Wang and Dechen Lin and Guanlin Shen and Xuanhe Zhou and Linfeng Zhang and Yuhang Zang and Xiaoyi Dong and Jiaqi Wang and Bo Zhang and Lei Bai and Pei Chu and Weijia Li and Jiang Wu and Lijun Wu and Zhenxiang Li and Guangyu Wang and Zhongying Tu and Chao Xu and Kai Chen and Yu Qiao and Bowen Zhou and Dahua Lin and Wentao Zhang and Conghui He},
|
|
845
|
+
year={2025},
|
|
846
|
+
eprint={2509.22186},
|
|
847
|
+
archivePrefix={arXiv},
|
|
848
|
+
primaryClass={cs.CV},
|
|
849
|
+
url={https://arxiv.org/abs/2509.22186},
|
|
850
|
+
}
|
|
851
|
+
|
|
828
852
|
@misc{wang2024mineruopensourcesolutionprecise,
|
|
829
853
|
title={MinerU: An Open-Source Solution for Precise Document Content Extraction},
|
|
830
854
|
author={Bin Wang and Chao Xu and Xiaomeng Zhao and Linke Ouyang and Fan Wu and Zhiyuan Zhao and Rui Xu and Kaiwen Liu and Yuan Qu and Fukai Shang and Bo Zhang and Liqun Wei and Zhihao Sui and Wei Li and Botian Shi and Yu Qiao and Dahua Lin and Conghui He},
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
2
|
+
mineru/version.py,sha256=OEib63e0yPEGlhEXyrWE1OwRnleR0cHI7KSX7oZEQLs,22
|
|
3
|
+
mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
4
|
+
mineru/backend/utils.py,sha256=GLJU3IznDmhE1_qNmkU1UOtsuskIHBezgsEVO6Uar-Y,698
|
|
5
|
+
mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
6
|
+
mineru/backend/pipeline/batch_analyze.py,sha256=dOnktvOMjfg84w1H34YlJg6N9_x6Yfvf14NIpOQcZqQ,22221
|
|
7
|
+
mineru/backend/pipeline/model_init.py,sha256=OfB2MMjNmZcHl4fkqS1fT5R8I3LVoSKAHGtl8PcBfBs,9372
|
|
8
|
+
mineru/backend/pipeline/model_json_to_middle_json.py,sha256=DtB7kE_7CtxwOMcb6QYeKzY6vMwUJNpavc5fn9z9oiI,10916
|
|
9
|
+
mineru/backend/pipeline/model_list.py,sha256=7cXMBfZrP0K6qWueg1D_-WoUANeSINzkn_ic9E7YQLs,222
|
|
10
|
+
mineru/backend/pipeline/para_split.py,sha256=Kq95MmvkPm7rKxlCSGiTvVKyF7CErHI2eGGAs5sLl0Q,17119
|
|
11
|
+
mineru/backend/pipeline/pipeline_analyze.py,sha256=rbO5AetOdnxR5ctkoDzFCFoElkz7Jgb7gi2Ct596NK8,6655
|
|
12
|
+
mineru/backend/pipeline/pipeline_magic_model.py,sha256=w8jGx8f6yZN0Wf2yPP3L9rYKc9rogxreZCrUJzJvPO8,14974
|
|
13
|
+
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py,sha256=xWWOFmYL6hB8PLrxQFyRJ72dAmTIDHtqiWV-WFUfR44,14081
|
|
14
|
+
mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
15
|
+
mineru/backend/vlm/model_output_to_middle_json.py,sha256=AqYX44gS9crUO_t7SuUatD71EVjow6pI6yA2Ik3gQ0s,5139
|
|
16
|
+
mineru/backend/vlm/utils.py,sha256=6NmVmr6-7idurCmT-1gE2SdmGaorSGgIaHmAg0fMABI,2792
|
|
17
|
+
mineru/backend/vlm/vlm_analyze.py,sha256=aepYsICM2LXhm4pkAa0Abyki1d8M-OdbgeL4KWt91BQ,8083
|
|
18
|
+
mineru/backend/vlm/vlm_magic_model.py,sha256=Pd0sOr7G1crAJIVeq6h_03gNSuxmV5U8dvGTGT_rrjs,23452
|
|
19
|
+
mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=Ie95XpwTgi7EmidcwE_scvXMRQjE2xASU_Rm_F8EP-I,13377
|
|
20
|
+
mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
21
|
+
mineru/cli/client.py,sha256=uo7db9Wqj1Mc11MYuaM-bi54BfKKU3SFB9Urc8md5X4,6641
|
|
22
|
+
mineru/cli/common.py,sha256=jxFJMdc-02UMO3SXAtcZ6aIdPrakAE6DCccZ9kDlPKc,14276
|
|
23
|
+
mineru/cli/fast_api.py,sha256=t5bda769VbM5iokAboiJfPIOnm-r5GTFReE-KQy8L3g,10941
|
|
24
|
+
mineru/cli/gradio_app.py,sha256=8rMdW7grwBUn0MdXyG4eOTQUzKWq6nErtMWl-vGdWbU,14525
|
|
25
|
+
mineru/cli/models_download.py,sha256=7KA-Boe-eIt3WW6eyaxM1HfubTXLsQ8sMmT1H1X7vAc,4815
|
|
26
|
+
mineru/cli/vlm_vllm_server.py,sha256=fQJyD-gIPQ41hR_6aIaDJczl66N310t0CiZEBAfX5mc,90
|
|
27
|
+
mineru/data/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
28
|
+
mineru/data/data_reader_writer/__init__.py,sha256=9qnGNrsuGBMwwfsQy6oChdkz--a_LPdYWE0VZZr0yr4,490
|
|
29
|
+
mineru/data/data_reader_writer/base.py,sha256=nqmAcdHOXMOJO6RAT3ILligDFaw8Op0STyCw5yOzAbI,1706
|
|
30
|
+
mineru/data/data_reader_writer/dummy.py,sha256=MSxQaZOK8i-llkPXDn08kvvuIte5oJB_4lRjr1mnXtA,315
|
|
31
|
+
mineru/data/data_reader_writer/filebase.py,sha256=glXVSJJ-uA__qD0J_rdhHU2VYEOGaiC2gk9SJwynPj4,2113
|
|
32
|
+
mineru/data/data_reader_writer/multi_bucket_s3.py,sha256=bwVIimVVaj0MNeVJpNAArW41dGyqqvCzq9JZ6Ohdspg,5828
|
|
33
|
+
mineru/data/data_reader_writer/s3.py,sha256=Nwf8icHVQqm8RI2n4AzzVuwK75d1q5JZVuDpe74ChHg,2361
|
|
34
|
+
mineru/data/io/__init__.py,sha256=mjQ_LASaX-4_pg-1uzaMJysIElglUs3o-akqrAP8MCQ,201
|
|
35
|
+
mineru/data/io/base.py,sha256=SqNQqe30ZvoVvg7GVv-hLMCjN6yBgDyQQWeLgGsTfhQ,1118
|
|
36
|
+
mineru/data/io/http.py,sha256=d2-CZBGjMPOvdAkkC9zxUEKYYiZPgL76ZqcG4hIDeT0,941
|
|
37
|
+
mineru/data/io/s3.py,sha256=3fDitN7rEGn1DKDkjKtf2yC68mDrJ-tVyyi8VYkNYeA,3593
|
|
38
|
+
mineru/data/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
39
|
+
mineru/data/utils/exceptions.py,sha256=oxFpUQVum8LRFAgg1cZvMoN4xgSUe95rgNDU2mzTlwc,834
|
|
40
|
+
mineru/data/utils/path_utils.py,sha256=ykeo-WW163I2GKAWo0vIpP1MrtwI99PPqtCC05uhvVM,1093
|
|
41
|
+
mineru/data/utils/schemas.py,sha256=MK_pnWkK69MRnVaykni2tCRy6sx7cdCePry_W7UUghc,714
|
|
42
|
+
mineru/model/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
43
|
+
mineru/model/layout/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
44
|
+
mineru/model/layout/doclayoutyolo.py,sha256=DttINdulzTiYcVDl_70oDtUdfVmGc9qkKWmbPOGAeV0,3867
|
|
45
|
+
mineru/model/mfd/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
46
|
+
mineru/model/mfd/yolo_v8.py,sha256=3zrxPQWgrSdq13CqcL9dNtZ8oJPLjQzH10hptNA1iLA,3492
|
|
47
|
+
mineru/model/mfr/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
48
|
+
mineru/model/mfr/utils.py,sha256=pAi1HnkTuO0R6251Hdl-o50m0wH0Ce89PAf74WCsXPU,11499
|
|
49
|
+
mineru/model/mfr/pp_formulanet_plus_m/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
|
+
mineru/model/mfr/pp_formulanet_plus_m/predict_formula.py,sha256=alGX_sPJxZh_7v1sOK3DJ8akfkWO-2c5I_JR7aXMTLU,5588
|
|
51
|
+
mineru/model/mfr/pp_formulanet_plus_m/processors.py,sha256=MSKyanxiDDjgDQHBov-GjKtPnMx9tSmxBC9GIkM3ft8,23832
|
|
52
|
+
mineru/model/mfr/unimernet/Unimernet.py,sha256=MrW6F084EHBmD-IbbtKbllrZ6MSH65otMJfrhBuRweg,5589
|
|
53
|
+
mineru/model/mfr/unimernet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
+
mineru/model/mfr/unimernet/unimernet_hf/__init__.py,sha256=kHcISG8GS4TWJW34SCJCei1jxo6HxvO00aC0dqyNFgI,413
|
|
55
|
+
mineru/model/mfr/unimernet/unimernet_hf/modeling_unimernet.py,sha256=_lN3zDKxeqsW-h9tXx79DYiT5uT4P9ixG49WrSYKFxE,7551
|
|
56
|
+
mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py,sha256=9T2rBpyGX5YFQYj89-mWujRokOuz4xgNreBuegcg1_c,228
|
|
57
|
+
mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py,sha256=hwRar3pqN_cVs3TRTNSuhB4wacBncfJ-qvaTajRb0xc,7934
|
|
58
|
+
mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py,sha256=XknL6UD2shfcErAD8kLk51Ty3Ltbv7uDi_Y3kxG1je8,114098
|
|
59
|
+
mineru/model/mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
|
+
mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py,sha256=8_1DKwDCDUBkeHYiJJ6MZnodZBsatHbqhygh11s9eEA,267
|
|
61
|
+
mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py,sha256=OX3eRUKBnKCXtxJOG3sdNoB1IV-Z7efgWU-gaclYOGA,5780
|
|
62
|
+
mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py,sha256=a9kCvwzJJSRrKQNtW2oOpTwrapzep8BjGFWLhLF1T0k,6036
|
|
63
|
+
mineru/model/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py,sha256=Q_fdmFHUBtEoAfWp9aowdwTCE2MIFMOPbYjoSyXK2iU,48929
|
|
64
|
+
mineru/model/ocr/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
65
|
+
mineru/model/ocr/paddleocr2pytorch/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
66
|
+
mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py,sha256=wZOw82q1NARNHBW2Lk5zumjdAqzPZqnhV6rvMULvLs8,9207
|
|
67
|
+
mineru/model/ori_cls/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
68
|
+
mineru/model/ori_cls/paddle_ori_cls.py,sha256=VIS22IerHST7g60AC9r2PEQIG6NQWeQaH1OrXIxNTsg,11943
|
|
69
|
+
mineru/model/reading_order/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
70
|
+
mineru/model/reading_order/layout_reader.py,sha256=IVUFcNMDF3-kio-BIxjppHnWS3eHPqvvNihIw2fbIFM,4372
|
|
71
|
+
mineru/model/reading_order/xycut.py,sha256=ezNSq_Y4UXiztB58hbXJsjTJlOBqWIjuW5A2uLSaZSo,7349
|
|
72
|
+
mineru/model/table/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
73
|
+
mineru/model/table/cls/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
74
|
+
mineru/model/table/cls/paddle_table_cls.py,sha256=5PtieKQnAzgMNRTZFgnqQsGWKTEQ3yyFWQnBRIjfQ4A,5781
|
|
75
|
+
mineru/model/table/rec/RapidTable.py,sha256=FxO3dLNKfQrgcQU7gRI0kLAxllnoHWZptCtyyHNuMpM,5973
|
|
76
|
+
mineru/model/table/rec/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
77
|
+
mineru/model/table/rec/slanet_plus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
78
|
+
mineru/model/table/rec/slanet_plus/main.py,sha256=vfrcvQ9JBf32YZU9eNoetoqdpcrFNsA1WNqQBsG8i2o,7646
|
|
79
|
+
mineru/model/table/rec/slanet_plus/matcher.py,sha256=uwF-wCLaYlaQ3JQ_-YywGVl1XQYnx7G_RTuWLW8JlBk,7321
|
|
80
|
+
mineru/model/table/rec/slanet_plus/matcher_utils.py,sha256=9wt_ydeeViLd57bU6g3lnXXni49qLSra2C6wSFQZkiw,9597
|
|
81
|
+
mineru/model/table/rec/slanet_plus/table_structure.py,sha256=Ve9eUdA0ivHf5bf9gwvHHfb7-E7drJLP3S3MPlh3uZ0,3844
|
|
82
|
+
mineru/model/table/rec/slanet_plus/table_structure_utils.py,sha256=YYSkwN2WdLx7qkWMSGkPY7yXOH5ENVhg5CsRGhtZ5Wk,19281
|
|
83
|
+
mineru/model/table/rec/unet_table/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
|
+
mineru/model/table/rec/unet_table/main.py,sha256=J13Q7_6stYyedmVedf9CZD7R0tuguGfTg3Z3ob4GDuM,15565
|
|
85
|
+
mineru/model/table/rec/unet_table/table_recover.py,sha256=rSyeWyuP10M8dLKA5e0n4P2DXMYbVbmgLxEcdZA8_0E,9059
|
|
86
|
+
mineru/model/table/rec/unet_table/table_structure_unet.py,sha256=beBMmBHAOR2lAuf2rcOKRSbFaJqwuIgMJWxWQsFmIRI,7908
|
|
87
|
+
mineru/model/table/rec/unet_table/utils.py,sha256=CYAqJW0wePJk4NAemb8W203N7E32v0ujiWbxanDhd8I,16083
|
|
88
|
+
mineru/model/table/rec/unet_table/utils_table_line_rec.py,sha256=zrCdPwI4M8nu0FEfd7lRJAe0z8kYq3KFbzwElM82USE,11174
|
|
89
|
+
mineru/model/table/rec/unet_table/utils_table_recover.py,sha256=XksJsY82ZS0kqUnNT-jvaYzxJ3V3svMSzj0puwIau1k,10651
|
|
90
|
+
mineru/model/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
91
|
+
mineru/model/utils/pytorchocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
92
|
+
mineru/model/utils/pytorchocr/base_ocr_v20.py,sha256=5bI7MAu65r-vn28krwdJ6pjZMkEvWjspE7EQaTsRERw,1319
|
|
93
|
+
mineru/model/utils/pytorchocr/data/__init__.py,sha256=YYu3c-W4fgEErxxDM98uQ3oWwPEh-6w75LY4zcj4VtM,199
|
|
94
|
+
mineru/model/utils/pytorchocr/data/imaug/__init__.py,sha256=c4H0gXPRweQ0wMFnkrCLTR6MrtG-e4kUinxwq2G1V9U,1480
|
|
95
|
+
mineru/model/utils/pytorchocr/data/imaug/operators.py,sha256=edBaDeezmRAkGkduPF6IWcUpE2WXRh7mARqSnwyynEA,14146
|
|
96
|
+
mineru/model/utils/pytorchocr/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
97
|
+
mineru/model/utils/pytorchocr/modeling/common.py,sha256=3r2jTvPYQS4IgTvIqR4l6bBVwR7jn-87rSmpv3tlqxI,2294
|
|
98
|
+
mineru/model/utils/pytorchocr/modeling/architectures/__init__.py,sha256=DCA9FS4mE5oCHDlBhUrkYLdxFeQIbhPj4P8oJ_gRZD8,832
|
|
99
|
+
mineru/model/utils/pytorchocr/modeling/architectures/base_model.py,sha256=RhV2Dm-os08kCFylT57zRu72Hq_RJdFy3xQe1MPaCuU,3588
|
|
100
|
+
mineru/model/utils/pytorchocr/modeling/backbones/__init__.py,sha256=dOmqhvLrBM-2imcwt73kS6APmMbhoVYTozlmqOkRfqA,2168
|
|
101
|
+
mineru/model/utils/pytorchocr/modeling/backbones/det_mobilenet_v3.py,sha256=r0gWnA1Xmt0Zw4FQLx7kf-WWwZd_26PfNzhM05drcuE,8334
|
|
102
|
+
mineru/model/utils/pytorchocr/modeling/backbones/rec_donut_swin.py,sha256=lTCje7mPuE-fHe05ATJe5C77wxk0CRTolAtD_gfQTmg,46257
|
|
103
|
+
mineru/model/utils/pytorchocr/modeling/backbones/rec_hgnet.py,sha256=UsIbzqN_koyGoSh1TA9r27SggpHbeKS3HmmS-A2Aw04,8341
|
|
104
|
+
mineru/model/utils/pytorchocr/modeling/backbones/rec_lcnetv3.py,sha256=-VRzTPtr7LTmwJ4dCtsI--gD81YAdh3llVau9K4Vnc8,16032
|
|
105
|
+
mineru/model/utils/pytorchocr/modeling/backbones/rec_mobilenet_v3.py,sha256=mJmE6xGpjHZH2Vaw16LlIlqRFFm9R9yRsSJEa3Yn3nw,4822
|
|
106
|
+
mineru/model/utils/pytorchocr/modeling/backbones/rec_mv1_enhance.py,sha256=K4p9KFYNmltV3y3QsxHIASNxoqlGtxgAoCxeFofyCmw,6726
|
|
107
|
+
mineru/model/utils/pytorchocr/modeling/backbones/rec_pphgnetv2.py,sha256=Yfp1xR5Shs5b1uxGjhFSrYgPq9Bl3NogYSo-KOJu08k,57119
|
|
108
|
+
mineru/model/utils/pytorchocr/modeling/backbones/rec_svtrnet.py,sha256=AIaUZ3IWBkRz2pWmanBjS0QdJcYnimMSV4MWofNpQcg,20222
|
|
109
|
+
mineru/model/utils/pytorchocr/modeling/heads/__init__.py,sha256=dlDWAICD_3PrYihipCHDP5GCJVH_-fwSj7WfojfICMo,1368
|
|
110
|
+
mineru/model/utils/pytorchocr/modeling/heads/cls_head.py,sha256=puIy5GlUtAKer6eS4HWKu07PzRd-HlDAqIz5WqjBHaA,596
|
|
111
|
+
mineru/model/utils/pytorchocr/modeling/heads/det_db_head.py,sha256=-k8bpuGQw_xIVDsumrfimOxg0O-oP2MOAyDJTjU70Ro,3633
|
|
112
|
+
mineru/model/utils/pytorchocr/modeling/heads/rec_ctc_head.py,sha256=ywyk5RJgUITdXvrUZk2yBSWKsaZIqnTofdFbuQUtwjU,1311
|
|
113
|
+
mineru/model/utils/pytorchocr/modeling/heads/rec_multi_head.py,sha256=K40SMA8tAVWu-3fwgfh3jGWeVFAdVnMyHjeZeI9OO7Q,2016
|
|
114
|
+
mineru/model/utils/pytorchocr/modeling/heads/rec_ppformulanet_head.py,sha256=5cLJUasDKrYCC47zTx2D5Osl6CnPh6JAmdwb6saeDWg,53991
|
|
115
|
+
mineru/model/utils/pytorchocr/modeling/heads/rec_unimernet_head.py,sha256=wsHxZEX2VO6kNELR43eUMleWJXrDUgcP5nLWfNIrM-E,95763
|
|
116
|
+
mineru/model/utils/pytorchocr/modeling/necks/__init__.py,sha256=634L1y-QWv5P8opNiSmKvQEx3Uskc20RG8DYiCdbl8U,1030
|
|
117
|
+
mineru/model/utils/pytorchocr/modeling/necks/db_fpn.py,sha256=TLF2pSyvRC0oPzL0eVyNlg3W6Zvfr4J8fD1nziVB7uI,14146
|
|
118
|
+
mineru/model/utils/pytorchocr/modeling/necks/intracl.py,sha256=w2QdwdI9BpiW92VS4mqL31sVERIbY53TfbD5Q6okiaY,3410
|
|
119
|
+
mineru/model/utils/pytorchocr/modeling/necks/rnn.py,sha256=TAUq4me4g_yXxat5wFOgGTqnqC2UUK1FZ2Le-2EWKqA,7503
|
|
120
|
+
mineru/model/utils/pytorchocr/postprocess/__init__.py,sha256=iC1Ol6CTxRWZBUyQ_5IVMR6kIurv9WJPOWWo7NAuZBA,1183
|
|
121
|
+
mineru/model/utils/pytorchocr/postprocess/cls_postprocess.py,sha256=1VVWXT_b1vhGb7PGvqyfUQ3Ip7LupH62vPva98GtjTA,685
|
|
122
|
+
mineru/model/utils/pytorchocr/postprocess/db_postprocess.py,sha256=AdZPF7frhQ27VVdp0GFmMcXtivwDZZfXYhzJOlP4zUs,6483
|
|
123
|
+
mineru/model/utils/pytorchocr/postprocess/rec_postprocess.py,sha256=qGB3onFEFhHjqksIR1IKOx2EY98ewfsmjADjrRXg30Y,30552
|
|
124
|
+
mineru/model/utils/pytorchocr/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
125
|
+
mineru/model/utils/pytorchocr/utils/resources/arch_config.yaml,sha256=yl4qTf-q0Du0MEOuYDffOt776_6qXBU5b2K3N-IOjd8,14964
|
|
126
|
+
mineru/model/utils/pytorchocr/utils/resources/models_config.yml,sha256=70B392J5XloC7mnK1eVi8GsWKSu7UE7qGffkEmBI9Us,2278
|
|
127
|
+
mineru/model/utils/pytorchocr/utils/resources/pp_formulanet_arch_config.yaml,sha256=a7yueOTUrfpZo8CsK6vQokbLNB2J-P77ihaCh_LozvQ,507
|
|
128
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/arabic_dict.txt,sha256=xbaXD14RWk0Vpc7fAHpephuszp1j-Qi3IWC4VrFKu70,407
|
|
129
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/chinese_cht_dict.txt,sha256=gyVR_uHy-8l1CHctgevcjboSwA3pejXHHJ3fQ92sGoM,33443
|
|
130
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/cyrillic_dict.txt,sha256=NpqCxsjEeXhKXXJkSLg7Hq-1_vCkEppeqjkpYl3c0TI,410
|
|
131
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/devanagari_dict.txt,sha256=tfG-bYu_8aGfuWxdTKlqQjOAI0u30s4OB7WDittNGOo,508
|
|
132
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/en_dict.txt,sha256=VmLfnS0D8OjKDTsGSdasurkEtqFLPTUhRjxxw3xmjOM,190
|
|
133
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/japan_dict.txt,sha256=Hc_LQe7JBXapRbMITyKt4RztUG4k8Uh5JFsHFpjzCOg,17332
|
|
134
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ka_dict.txt,sha256=-tP3ZZQyde7CE0pvvJtSeFQmZBEE1OfbOhWdxz80Hd4,452
|
|
135
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/korean_dict.txt,sha256=qh_ciuj3zUCg7E7bRy6wQh4RQn5sz-6ZFUQHQsGLCiA,14480
|
|
136
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/latin_dict.txt,sha256=jm1ONil4jDXDH35TAofWFHtUm7eiZb1nCLsoETRCniw,468
|
|
137
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt,sha256=KLI2KtSrLcOHaapy_rU146nds_0qdYWgWSDmOTsdx_c,26249
|
|
138
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt,sha256=pbw4h8Q8kB5aP5exP_rfHFdU7efMjJ9aviLodafEg3I,62346
|
|
139
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_arabic_dict.txt,sha256=f5L327m3WkeHqDv7T20UqKtRVSUTDJ1AqQNvYc9pmek,2369
|
|
140
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_cyrillic_dict.txt,sha256=20CqUs6xEgVb6AxpSv32VdXSxPeHNwRSTMFqRHypE7o,2781
|
|
141
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_devanagari_dict.txt,sha256=CcdEC_xUd-XEEFIwS2sYWv-MSl6LK0wjwccG9v4e6fw,1943
|
|
142
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_dict.txt,sha256=0Zeen3lMRkwNLgtwp_4U3ZeOncZEwOcfFBWM34NCrxs,74012
|
|
143
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_el_dict.txt,sha256=Md78YsDDrTZ0qC2mGSImorqY70_wFKcEXLiNWfnD3jE,1103
|
|
144
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_en_dict.txt,sha256=4CWmbTHzJ7oMIy4D9AeujRBeHnCefMs_QIqneMJOcNY,1416
|
|
145
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_eslav_dict.txt,sha256=PpXxWBVXFihwys26WvkaTGviiQcQ05Www8dXjn7l5us,1663
|
|
146
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_korean_dict.txt,sha256=qIBxxowBcHSJuqeevgQFt761zKIp9PyUzD75kjKIAtc,47451
|
|
147
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt,sha256=PAqKebYSZTwl92UnFxT3EoHk6VWWLBU-Jyt7jB0rE_8,1634
|
|
148
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_ta_dict.txt,sha256=hbVBNSrhjca6bUcVLYv4rf9rAmbmBdLu8pkMG_RmEXs,1723
|
|
149
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_te_dict.txt,sha256=Qvg_XT_bUHeOT6W2bFjZmlmrd5IVHF5080uP_XthydY,1831
|
|
150
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ppocrv5_th_dict.txt,sha256=V_VAb5S7Zoj7cHf3vmXwi71xzs9IwB6ibFIstcSDa3o,1767
|
|
151
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/ta_dict.txt,sha256=6T5pSBSv2f8ekYtvS7Qmf7TGWpNE7l10ZPkTW5DAonA,352
|
|
152
|
+
mineru/model/utils/pytorchocr/utils/resources/dict/te_dict.txt,sha256=7plGpg13AZd0dOiYg2lKTKIOqjhoojM0v3lA3NAI8Pk,429
|
|
153
|
+
mineru/model/utils/tools/__init__.py,sha256=xEqR65Z8YOzOLorLjK0LCHos2zX-tCuxSrxndjU00hE,49
|
|
154
|
+
mineru/model/utils/tools/infer/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
155
|
+
mineru/model/utils/tools/infer/predict_cls.py,sha256=8RmKl1vejnZl65caHZNV2ta6hMsg5B_LE-FuqCO8T8A,4225
|
|
156
|
+
mineru/model/utils/tools/infer/predict_det.py,sha256=vYQREn7vELXxBsr72CCCVvm1gwV82ONaCwGfxUIjne8,13621
|
|
157
|
+
mineru/model/utils/tools/infer/predict_rec.py,sha256=-BH93JDisu0kT6CyHA4plUOKcb2L-UvDk7Tein5uwt4,19209
|
|
158
|
+
mineru/model/utils/tools/infer/predict_system.py,sha256=hkegkn6hq2v2zqHVAP615-k-fkTS8swRYSbZeoqmSI8,3822
|
|
159
|
+
mineru/model/utils/tools/infer/pytorchocr_utility.py,sha256=i1PFN-_kefJUUZ4Vk7igs1TU8gfErTDlDXY6-8Uaurw,9323
|
|
160
|
+
mineru/model/vlm_vllm_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
161
|
+
mineru/model/vlm_vllm_model/server.py,sha256=nv51j9yAa-u4iFGy4Idh4-viM4sqLHvzs3Lk5w-Cfxg,2105
|
|
162
|
+
mineru/resources/header.html,sha256=PUselBXLBn8gfeP3zwEtj6zIxfhcCN4vN_B796nQFNQ,4410
|
|
163
|
+
mineru/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
|
|
164
|
+
mineru/utils/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
|
|
165
|
+
mineru/utils/block_pre_proc.py,sha256=uGBmxf2MR9bplTnQI8xHjCI-kj3plRhJr0hcWKidbOQ,9632
|
|
166
|
+
mineru/utils/block_sort.py,sha256=mViceDw3O2ksBDFxt-wmX67bCZOwKyp68yZnEjS3Ijc,12934
|
|
167
|
+
mineru/utils/boxbase.py,sha256=moP660AmZq_udHEsfvFkTQdJ4gjrrBwN7t0Enx7CIL8,6903
|
|
168
|
+
mineru/utils/cli_parser.py,sha256=4seFAu1kulsYnw6WM2q_cxgEOt2tErZVkI-LNEF_kGw,1445
|
|
169
|
+
mineru/utils/config_reader.py,sha256=IRVWTpBnbnRpck6eXZUKw-fcLt7hon5S4uqWW-RBb1w,4075
|
|
170
|
+
mineru/utils/cut_image.py,sha256=g3m4nfcJNWlxi-P0kpXTtlmspXkMcLCfGwmYuQ-Z2hE,751
|
|
171
|
+
mineru/utils/draw_bbox.py,sha256=FkgppjUzRhN-uxvChdkhHXcDavJEaApMD6qC6qoRwfQ,20292
|
|
172
|
+
mineru/utils/enum_class.py,sha256=-_Ey03vGNEQHkl6x7pZ43GgrakwhSCOa1RXdr1m-I3A,2503
|
|
173
|
+
mineru/utils/format_utils.py,sha256=2s89vHcSISjuolk8Hvg3K-5-rRbiT3Us7eFLzUKrNKs,10233
|
|
174
|
+
mineru/utils/guess_suffix_or_lang.py,sha256=nznyQpUn1BSA8JNw9HuG3pVV-xtVAtrtcGuHZ-VXt9M,856
|
|
175
|
+
mineru/utils/hash_utils.py,sha256=UPS_8NRBmVumdyOv16Lmv6Ly2xK8OVDJEe5gG6gKIFk,857
|
|
176
|
+
mineru/utils/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
|
|
177
|
+
mineru/utils/llm_aided.py,sha256=eBGKCD7cJBjkyn38yqCdh0S-fgRG9fLuQCByLDQuyWs,4983
|
|
178
|
+
mineru/utils/magic_model_utils.py,sha256=2xOvi4oqg3MSw1FUrJTnYDtWeFrrm6qbmlEorLZSaYs,5650
|
|
179
|
+
mineru/utils/model_utils.py,sha256=6OsgFLsABX5JuShSzCMSNHWV-yi-1cjwHweafyxIgRo,18448
|
|
180
|
+
mineru/utils/models_download_utils.py,sha256=UfjvwhxO6BkJHa5JSpEVNZ71GoLMPMmJpym3THET2T4,2957
|
|
181
|
+
mineru/utils/ocr_utils.py,sha256=lPIrwNUib5mrzUkponRYHuUCdjV2qvETNLSzOLyflrU,15990
|
|
182
|
+
mineru/utils/pdf_classify.py,sha256=6DF5pH_9Uq83fsFtp7n4i-OdYQGzoNOV9L0VBUhgBMQ,8078
|
|
183
|
+
mineru/utils/pdf_image_tools.py,sha256=mioLEHOdDtM1YbspNaa0wWhnLw_4-H7rdHlIM40vrT4,4077
|
|
184
|
+
mineru/utils/pdf_reader.py,sha256=WeINm5SyWBUXT0wP9lzIbeHs8P6WUIkN6nVL5X4LzG4,3267
|
|
185
|
+
mineru/utils/pdf_text_tool.py,sha256=KEztjfdqsIHHuiTEAMAL7Lr1OS3R7Ur-uTqGiCRjReQ,1364
|
|
186
|
+
mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,1286
|
|
187
|
+
mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
|
|
188
|
+
mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
|
|
189
|
+
mineru/utils/table_merge.py,sha256=d98zNbM1ZQ8V1kUt6RugParNUNPv7DGL-XKIzR3iJVQ,15360
|
|
190
|
+
mineru-2.6.0.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
|
191
|
+
mineru-2.6.0.dist-info/METADATA,sha256=dbt-b5mAS6fgkv06-dMemfgqARV02Ji_eCDqZ6SlRD4,68358
|
|
192
|
+
mineru-2.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
193
|
+
mineru-2.6.0.dist-info/entry_points.txt,sha256=luXmbhPiZK_tKlRgWuYOaW_V6EFpG-yJcAevVv9MEqE,252
|
|
194
|
+
mineru-2.6.0.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
|
|
195
|
+
mineru-2.6.0.dist-info/RECORD,,
|