PyPI - mineru - Versions diffs - 2.6.6__py3-none-any.whl → 2.6.8__py3-none-any.whl - Mend

mineru 2.6.6py3-none-any.whl → 2.6.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

mineru/backend/vlm/vlm_magic_model.py +10 -3
mineru/backend/vlm/vlm_middle_json_mkcontent.py +283 -2
mineru/cli/common.py +7 -0
mineru/utils/enum_class.py +26 -0
mineru/utils/guess_suffix_or_lang.py +10 -3
mineru/utils/table_merge.py +10 -2
mineru/version.py +1 -1
{mineru-2.6.6.dist-info → mineru-2.6.8.dist-info}/METADATA +4 -1
{mineru-2.6.6.dist-info → mineru-2.6.8.dist-info}/RECORD +13 -13
{mineru-2.6.6.dist-info → mineru-2.6.8.dist-info}/WHEEL +0 -0
{mineru-2.6.6.dist-info → mineru-2.6.8.dist-info}/entry_points.txt +0 -0
{mineru-2.6.6.dist-info → mineru-2.6.8.dist-info}/licenses/LICENSE.md +0 -0
{mineru-2.6.6.dist-info → mineru-2.6.8.dist-info}/top_level.txt +0 -0

mineru/backend/vlm/vlm_magic_model.py CHANGED Viewed

@@ -45,7 +45,7 @@ class MagicModel:
                 continue
             span_type = "unknown"
-            line_type = None
+            code_block_sub_type = None
             guess_lang = None
             if block_type in [
@@ -74,7 +74,7 @@ class MagicModel:
                 span_type = ContentType.TABLE
             elif block_type in ["code", "algorithm"]:
                 block_content = code_content_clean(block_content)
-                line_type = block_type
+                code_block_sub_type = block_type
                 block_type = BlockType.CODE_BODY
                 span_type = ContentType.TEXT
                 guess_lang = guess_language_by_text(block_content)
@@ -82,6 +82,9 @@ class MagicModel:
                 block_type = BlockType.INTERLINE_EQUATION
                 span_type = ContentType.INTERLINE_EQUATION
+            #  code 和 algorithm 类型的块，如果内容中包含行内公式，则需要将块类型切换为algorithm
+            switch_code_to_algorithm = False
             if span_type in ["image", "table"]:
                 span = {
                     "bbox": block_bbox,
@@ -102,6 +105,8 @@ class MagicModel:
                 if block_content and block_content.count("\\(") == block_content.count("\\)") and block_content.count("\\(") > 0:
+                    switch_code_to_algorithm = True
                     # 生成包含文本和公式的span列表
                     spans = []
                     last_end = 0
@@ -160,7 +165,9 @@ class MagicModel:
             # 构造line对象
             if block_type in [BlockType.CODE_BODY]:
-                line = {"bbox": block_bbox, "spans": spans, "extra": {"type": line_type, "guess_lang": guess_lang}}
+                if switch_code_to_algorithm and code_block_sub_type == "code":
+                    code_block_sub_type = "algorithm"
+                line = {"bbox": block_bbox, "spans": spans, "extra": {"type": code_block_sub_type, "guess_lang": guess_lang}}
             else:
                 line = {"bbox": block_bbox, "spans": spans}

mineru/backend/vlm/vlm_middle_json_mkcontent.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import os
+from loguru import logger
 from mineru.utils.config_reader import get_latex_delimiter_config, get_formula_enable, get_table_enable
-from mineru.utils.enum_class import MakeMode, BlockType, ContentType
+from mineru.utils.enum_class import MakeMode, BlockType, ContentType, ContentTypeV2
 latex_delimiters_config = get_latex_delimiter_config()
@@ -234,6 +235,277 @@ def make_blocks_to_content_list(para_block, img_buket_path, page_idx, page_size)
     return para_content
+def make_blocks_to_content_list_v2(para_block, img_buket_path, page_size):
+    para_type = para_block['type']
+    para_content = {}
+    if para_type in [
+        BlockType.HEADER,
+        BlockType.FOOTER,
+        BlockType.ASIDE_TEXT,
+        BlockType.PAGE_NUMBER,
+        BlockType.PAGE_FOOTNOTE,
+    ]:
+        if para_type == BlockType.HEADER:
+            content_type = ContentTypeV2.PAGE_HEADER
+        elif para_type == BlockType.FOOTER:
+            content_type = ContentTypeV2.PAGE_FOOTER
+        elif para_type == BlockType.ASIDE_TEXT:
+            content_type = ContentTypeV2.PAGE_ASIDE_TEXT
+        elif para_type == BlockType.PAGE_NUMBER:
+            content_type = ContentTypeV2.PAGE_NUMBER
+        elif para_type == BlockType.PAGE_FOOTNOTE:
+            content_type = ContentTypeV2.PAGE_FOOTNOTE
+        else:
+            raise ValueError(f"Unknown para_type: {para_type}")
+        para_content = {
+            'type': content_type,
+            'content': {
+                f"{content_type}_content": merge_para_with_text_v2(para_block),
+            }
+        }
+    elif para_type == BlockType.TITLE:
+        title_level = get_title_level(para_block)
+        if title_level != 0:
+            para_content = {
+                'type': ContentTypeV2.TITLE,
+                'content': {
+                    "title_content": merge_para_with_text_v2(para_block),
+                    "level": title_level
+                }
+            }
+        else:
+            para_content = {
+                'type': ContentTypeV2.PARAGRAPH,
+                'content': {
+                    "paragraph_content": merge_para_with_text_v2(para_block),
+                }
+            }
+    elif para_type in [
+        BlockType.TEXT,
+        BlockType.PHONETIC
+    ]:
+        para_content = {
+            'type': ContentTypeV2.PARAGRAPH,
+            'content': {
+                'paragraph_content': merge_para_with_text_v2(para_block),
+            }
+        }
+    elif para_type == BlockType.INTERLINE_EQUATION:
+        image_path, math_content = get_body_data(para_block)
+        para_content = {
+            'type': ContentTypeV2.EQUATION_INTERLINE,
+            'content': {
+                'math_content': math_content,
+                'math_type': 'latex',
+                'image_source': {'path': f"{img_buket_path}/{image_path}"},
+            }
+        }
+    elif para_type == BlockType.IMAGE:
+        image_caption = []
+        image_footnote = []
+        image_path, _ = get_body_data(para_block)
+        image_source = {
+            'path': f"{img_buket_path}/{image_path}",
+        }
+        for block in para_block['blocks']:
+            if block['type'] == BlockType.IMAGE_CAPTION:
+                image_caption.extend(merge_para_with_text_v2(block))
+            if block['type'] == BlockType.IMAGE_FOOTNOTE:
+                image_footnote.extend(merge_para_with_text_v2(block))
+        para_content = {
+            'type': ContentTypeV2.IMAGE,
+            'content': {
+                'image_source': image_source,
+                'image_caption': image_caption,
+                'image_footnote': image_footnote,
+            }
+        }
+    elif para_type == BlockType.TABLE:
+        table_caption = []
+        table_footnote = []
+        image_path, html = get_body_data(para_block)
+        image_source = {
+            'path': f"{img_buket_path}/{image_path}",
+        }
+        if html.count("<table") > 1:
+            table_nest_level = 2
+        else:
+            table_nest_level = 1
+        if (
+                "colspan" in html or
+                "rowspan" in html or
+                table_nest_level > 1
+        ):
+            table_type = ContentTypeV2.TABLE_COMPLEX
+        else:
+            table_type = ContentTypeV2.TABLE_SIMPLE
+        for block in para_block['blocks']:
+            if block['type'] == BlockType.TABLE_CAPTION:
+                table_caption.extend(merge_para_with_text_v2(block))
+            if block['type'] == BlockType.TABLE_FOOTNOTE:
+                table_footnote.extend(merge_para_with_text_v2(block))
+        para_content = {
+            'type': ContentTypeV2.TABLE,
+            'content': {
+                'image_source': image_source,
+                'table_caption': table_caption,
+                'table_footnote': table_footnote,
+                'html': html,
+                'table_type': table_type,
+                'table_nest_level': table_nest_level,
+            }
+        }
+    elif para_type == BlockType.CODE:
+        code_caption = []
+        code_content = []
+        for block in para_block['blocks']:
+            if block['type'] == BlockType.CODE_CAPTION:
+                code_caption.extend(merge_para_with_text_v2(block))
+            if block['type'] == BlockType.CODE_BODY:
+                code_content = merge_para_with_text_v2(block)
+        sub_type = para_block["sub_type"]
+        if sub_type == BlockType.CODE:
+            para_content = {
+                'type': ContentTypeV2.CODE,
+                'content': {
+                    'code_caption': code_caption,
+                    'code_content': code_content,
+                    'code_language': para_block.get('guess_lang', 'txt'),
+                }
+            }
+        elif sub_type == BlockType.ALGORITHM:
+            para_content = {
+                'type': ContentTypeV2.ALGORITHM,
+                'content': {
+                    'algorithm_caption': code_caption,
+                    'algorithm_content': code_content,
+                }
+            }
+        else:
+            raise ValueError(f"Unknown code sub_type: {sub_type}")
+    elif para_type == BlockType.REF_TEXT:
+        para_content = {
+            'type': ContentTypeV2.LIST,
+            'content': {
+                'list_type': ContentTypeV2.LIST_REF,
+                'list_items': [
+                    {
+                        'item_type': 'text',
+                        'item_content': merge_para_with_text_v2(para_block),
+                    }
+                ],
+            }
+        }
+    elif para_type == BlockType.LIST:
+        if 'sub_type' in para_block:
+            if para_block['sub_type'] == BlockType.REF_TEXT:
+                list_type = ContentTypeV2.LIST_REF
+            elif para_block['sub_type'] == BlockType.TEXT:
+                list_type = ContentTypeV2.LIST_TEXT
+            else:
+                raise ValueError(f"Unknown list sub_type: {para_block['sub_type']}")
+        else:
+            list_type = ContentTypeV2.LIST_TEXT
+        list_items = []
+        for block in para_block['blocks']:
+            item_content = merge_para_with_text_v2(block)
+            if item_content:
+                list_items.append({
+                    'item_type': 'text',
+                    'item_content': item_content,
+                })
+        para_content = {
+            'type': ContentTypeV2.LIST,
+            'content': {
+                'list_type': list_type,
+                'list_items': list_items,
+            }
+        }
+    page_width, page_height = page_size
+    para_bbox = para_block.get('bbox')
+    if para_bbox:
+        x0, y0, x1, y1 = para_bbox
+        para_content['bbox'] = [
+            int(x0 * 1000 / page_width),
+            int(y0 * 1000 / page_height),
+            int(x1 * 1000 / page_width),
+            int(y1 * 1000 / page_height),
+        ]
+    return para_content
+def get_body_data(para_block):
+    """
+    Extract image_path and html from para_block
+    Returns:
+        - For IMAGE/INTERLINE_EQUATION: (image_path, '')
+        - For TABLE: (image_path, html)
+        - Default: ('', '')
+    """
+    def get_data_from_spans(lines):
+        for line in lines:
+            for span in line.get('spans', []):
+                span_type = span.get('type')
+                if span_type == ContentType.TABLE:
+                    return span.get('image_path', ''), span.get('html', '')
+                elif span_type == ContentType.IMAGE:
+                    return span.get('image_path', ''), ''
+                elif span_type == ContentType.INTERLINE_EQUATION:
+                    return span.get('image_path', ''), span.get('content', '')
+                elif span_type == ContentType.TEXT:
+                    return '', span.get('content', '')
+        return '', ''
+    # 处理嵌套的 blocks 结构
+    if 'blocks' in para_block:
+        for block in para_block['blocks']:
+            block_type = block.get('type')
+            if block_type in [BlockType.IMAGE_BODY, BlockType.TABLE_BODY, BlockType.CODE_BODY]:
+                result = get_data_from_spans(block.get('lines', []))
+                if result != ('', ''):
+                    return result
+        return '', ''
+    # 处理直接包含 lines 的结构
+    return get_data_from_spans(para_block.get('lines', []))
+def merge_para_with_text_v2(para_block):
+    para_content = []
+    para_type = para_block['type']
+    for line in para_block['lines']:
+        for span in line['spans']:
+            span_type = span['type']
+            if span.get("content", '').strip():
+                if para_type == BlockType.PHONETIC and span_type == ContentTypeV2.SPAN_TEXT:
+                    span_type = ContentTypeV2.SPAN_PHONETIC
+                if span_type == ContentType.INLINE_EQUATION:
+                    span_type = ContentTypeV2.SPAN_EQUATION_INLINE
+                if span_type in [
+                    ContentTypeV2.SPAN_TEXT,
+                    ContentTypeV2.SPAN_PHONETIC,
+                    ContentTypeV2.SPAN_EQUATION_INLINE,
+                    ContentTypeV2.SPAN_MD,
+                    ContentTypeV2.SPAN_CODE_INLINE,
+                ]:
+                    span_content = {
+                        'type': span_type,
+                        'content': span['content'],
+                    }
+                    para_content.append(span_content)
+                else:
+                    logger.warning(f"Unknown span type in merge_para_with_text_v2: {span_type}")
+    return para_content
 def union_make(pdf_info_dict: list,
                make_mode: str,
                img_buket_path: str = '',
@@ -260,10 +532,19 @@ def union_make(pdf_info_dict: list,
             for para_block in para_blocks:
                 para_content = make_blocks_to_content_list(para_block, img_buket_path, page_idx, page_size)
                 output_content.append(para_content)
+        elif make_mode == MakeMode.CONTENT_LIST_V2:
+            # https://github.com/drunkpig/llm-webkit-mirror/blob/dev6/docs/specification/output_format/content_list_spec.md
+            para_blocks = (paras_of_layout or []) + (paras_of_discarded or [])
+            page_contents = []
+            if para_blocks:
+                for para_block in para_blocks:
+                    para_content = make_blocks_to_content_list_v2(para_block, img_buket_path, page_size)
+                    page_contents.append(para_content)
+            output_content.append(page_contents)
     if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
         return '\n\n'.join(output_content)
-    elif make_mode == MakeMode.CONTENT_LIST:
+    elif make_mode in [MakeMode.CONTENT_LIST, MakeMode.CONTENT_LIST_V2]:
         return output_content
     return None

mineru/cli/common.py CHANGED Viewed

@@ -144,6 +144,13 @@ def _process_output(
             f"{pdf_file_name}_content_list.json",
             json.dumps(content_list, ensure_ascii=False, indent=4),
         )
+        if not is_pipeline:
+            content_list_v2 = make_func(pdf_info, MakeMode.CONTENT_LIST_V2, image_dir)
+            md_writer.write_string(
+                f"{pdf_file_name}_content_list_v2.json",
+                json.dumps(content_list_v2, ensure_ascii=False, indent=4),
+            )
     if f_dump_middle_json:
         md_writer.write_string(

mineru/utils/enum_class.py CHANGED Viewed

@@ -38,6 +38,31 @@ class ContentType:
     CODE = 'code'
+class ContentTypeV2:
+    CODE = 'code'
+    ALGORITHM = "algorithm"
+    EQUATION_INTERLINE = 'equation_interline'
+    IMAGE = 'image'
+    TABLE = 'table'
+    TABLE_SIMPLE = 'simple_table'
+    TABLE_COMPLEX = 'complex_table'
+    LIST = 'list'
+    LIST_TEXT = 'text_list'
+    LIST_REF = 'reference_list'
+    TITLE = 'title'
+    PARAGRAPH = 'paragraph'
+    SPAN_TEXT = 'text'
+    SPAN_EQUATION_INLINE = 'equation_inline'
+    SPAN_PHONETIC = 'phonetic'
+    SPAN_MD = 'md'
+    SPAN_CODE_INLINE = 'code_inline'
+    PAGE_HEADER = "page_header"
+    PAGE_FOOTER = "page_footer"
+    PAGE_NUMBER = "page_number"
+    PAGE_ASIDE_TEXT = "page_aside_text"
+    PAGE_FOOTNOTE = "page_footnote"
 class CategoryId:
     Title = 0
     Text = 1
@@ -60,6 +85,7 @@ class MakeMode:
     MM_MD = 'mm_markdown'
     NLP_MD = 'nlp_markdown'
     CONTENT_LIST = 'content_list'
+    CONTENT_LIST_V2 = 'content_list_v2'
 class ModelPath:

mineru/utils/guess_suffix_or_lang.py CHANGED Viewed

@@ -1,9 +1,11 @@
 from pathlib import Path
+from loguru import logger
 from magika import Magika
 DEFAULT_LANG = "txt"
+PDF_SIG_BYTES = b'%PDF'
 magika = Magika()
 def guess_language_by_text(code):
@@ -14,7 +16,7 @@ def guess_language_by_text(code):
 def guess_suffix_by_bytes(file_bytes, file_path=None) -> str:
     suffix = magika.identify_bytes(file_bytes).prediction.output.label
-    if file_path and suffix in ["ai"] and Path(file_path).suffix.lower() in [".pdf"]:
+    if file_path and suffix in ["ai", "html"] and Path(file_path).suffix.lower() in [".pdf"] and file_bytes[:4] == PDF_SIG_BYTES:
         suffix = "pdf"
     return suffix
@@ -23,6 +25,11 @@ def guess_suffix_by_path(file_path) -> str:
     if not isinstance(file_path, Path):
         file_path = Path(file_path)
     suffix = magika.identify_path(file_path).prediction.output.label
-    if suffix in ["ai"] and file_path.suffix.lower() in [".pdf"]:
-        suffix = "pdf"
+    if suffix in ["ai", "html"] and file_path.suffix.lower() in [".pdf"]:
+        try:
+            with open(file_path, 'rb') as f:
+                if f.read(4) == PDF_SIG_BYTES:
+                    suffix = "pdf"
+        except Exception as e:
+            logger.warning(f"Failed to read file {file_path} for PDF signature check: {e}")
     return suffix

mineru/utils/table_merge.py CHANGED Viewed

@@ -7,6 +7,9 @@ from mineru.backend.vlm.vlm_middle_json_mkcontent import merge_para_with_text
 from mineru.utils.enum_class import BlockType, SplitFlag
+CONTINUATION_MARKERS = ["(续)", "(续表)", "(continued)", "(cont.)"]
 def full_to_half(text: str) -> str:
     """Convert full-width characters to half-width characters using code point manipulation.
@@ -174,8 +177,13 @@ def can_merge_tables(current_table_block, previous_table_block):
     # 如果有TABLE_CAPTION类型的块,检查是否至少有一个以"(续)"结尾
     caption_blocks = [block for block in current_table_block["blocks"] if block["type"] == BlockType.TABLE_CAPTION]
     if caption_blocks:
-        # 如果所有caption都不以"(续)"结尾,则不合并
-        if not any(full_to_half(merge_para_with_text(block).strip()).endswith("(续)") for block in caption_blocks):
+        # 如果所有caption都不以"(续)"、"(续表)"、"(continued)"或"(cont.)"结尾,则不合并
+        if not any(
+                any(full_to_half(merge_para_with_text(block).strip()).lower().endswith(marker.lower())
+                    for marker in CONTINUATION_MARKERS)
+                for block in caption_blocks
+        ):
             return False, None, None, None, None
     if any(block["type"] == BlockType.TABLE_FOOTNOTE for block in previous_table_block["blocks"]):

mineru/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "2.6.6"
1	+ __version__ = "2.6.8"

{mineru-2.6.6.dist-info → mineru-2.6.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mineru
-Version: 2.6.6
+Version: 2.6.8
 Summary: A practical tool for converting PDF to Markdown
 License: AGPL-3.0
 Project-URL: homepage, https://mineru.net/
@@ -135,6 +135,9 @@ Dynamic: license-file
 # Changelog
+- 2025/12/12 2.6.7 Release
+  - Bug fix: #4168
 - 2025/12/02 2.6.6 Release
   - `mineru-api` tool optimizations
     - Added descriptive text to `mineru-api` interface parameters to improve API documentation readability.

{mineru-2.6.6.dist-info → mineru-2.6.8.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 mineru/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
-mineru/version.py,sha256=MJHGx-Qo0nycI7WHSavnK8Mok6HS_De_qLfGWXih6Og,22
+mineru/version.py,sha256=3WRCYKawwH5XZgRMQ8fXEoGyCz5YorbyTfOWuY9SV3g,22
 mineru/backend/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
 mineru/backend/utils.py,sha256=GLJU3IznDmhE1_qNmkU1UOtsuskIHBezgsEVO6Uar-Y,698
 mineru/backend/pipeline/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
@@ -15,11 +15,11 @@ mineru/backend/vlm/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5
 mineru/backend/vlm/model_output_to_middle_json.py,sha256=AqYX44gS9crUO_t7SuUatD71EVjow6pI6yA2Ik3gQ0s,5139
 mineru/backend/vlm/utils.py,sha256=JMgS3SMFcHJYH2jIx-Xhs-P2a1bmT8U6Kn60IL0OmQA,3570
 mineru/backend/vlm/vlm_analyze.py,sha256=wP3vuYGVec0hRsDAuzfSm2HD4Muu7wSWL767qxd_yqw,11690
-mineru/backend/vlm/vlm_magic_model.py,sha256=Pd0sOr7G1crAJIVeq6h_03gNSuxmV5U8dvGTGT_rrjs,23452
-mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=5V-AU9KkxxMn0DDSQBrb15I4GVpEyiQy8uNI_tQhS6M,13498
+mineru/backend/vlm/vlm_magic_model.py,sha256=e8BWkfeRkZNJjFdm9oPmvIs9ATjdzCy_5OZw2qTziZA,23839
+mineru/backend/vlm/vlm_middle_json_mkcontent.py,sha256=Oc2w2BSRXeJPKVcM1Bkb2LjLNTQAG5yUYRtF7YNBTss,24070
 mineru/cli/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
 mineru/cli/client.py,sha256=XSEIr4klUuufMAWn5IioZdXpg1xAxqRZF0HkaVIhxh0,6815
-mineru/cli/common.py,sha256=zhNOJCOnTSMbWdUWSZG-nf0odv5vBRtdZYZ1UbUPH3g,14369
+mineru/cli/common.py,sha256=Rld3P4FzbTYngy3BP-Irto9aMS7EPq2lIWgeIF1xPMc,14667
 mineru/cli/fast_api.py,sha256=lLxQKKHmD8ruoZGcE6LrXzr3pQIxvw8OdJrQq_FNLSM,14447
 mineru/cli/gradio_app.py,sha256=EUPuRHHCOECrE3E3VNEeuMDYeC3nicurOYfk8YJSOMw,15646
 mineru/cli/models_download.py,sha256=LNfoIpUlJM7m7qb2SiCxtjMDw4jILBQtZwNP2JoY81U,4815
@@ -159,9 +159,9 @@ mineru/utils/cli_parser.py,sha256=4seFAu1kulsYnw6WM2q_cxgEOt2tErZVkI-LNEF_kGw,14
 mineru/utils/config_reader.py,sha256=IRVWTpBnbnRpck6eXZUKw-fcLt7hon5S4uqWW-RBb1w,4075
 mineru/utils/cut_image.py,sha256=g3m4nfcJNWlxi-P0kpXTtlmspXkMcLCfGwmYuQ-Z2hE,751
 mineru/utils/draw_bbox.py,sha256=FkgppjUzRhN-uxvChdkhHXcDavJEaApMD6qC6qoRwfQ,20292
-mineru/utils/enum_class.py,sha256=-_Ey03vGNEQHkl6x7pZ43GgrakwhSCOa1RXdr1m-I3A,2503
+mineru/utils/enum_class.py,sha256=NO9FiM2Ni6-SI1rzMm5X98ssNDxgJTyykZddJSscgJs,3221
 mineru/utils/format_utils.py,sha256=2s89vHcSISjuolk8Hvg3K-5-rRbiT3Us7eFLzUKrNKs,10233
-mineru/utils/guess_suffix_or_lang.py,sha256=nznyQpUn1BSA8JNw9HuG3pVV-xtVAtrtcGuHZ-VXt9M,856
+mineru/utils/guess_suffix_or_lang.py,sha256=aUC2wAJwa5LH0SHxwTbOEJqVVgvpdUCWFF6of8eDVkc,1198
 mineru/utils/hash_utils.py,sha256=UPS_8NRBmVumdyOv16Lmv6Ly2xK8OVDJEe5gG6gKIFk,857
 mineru/utils/language.py,sha256=7RT3mxSa7jdpoC5ySd7ZddHA7TO7UsnmDOWiYZAxuyg,1433
 mineru/utils/llm_aided.py,sha256=9WUytvxenSAuaWR4sTQhVPQ5h8pY0wVOH1O2sj_6dLs,5149
@@ -178,10 +178,10 @@ mineru/utils/pdf_text_tool.py,sha256=KEztjfdqsIHHuiTEAMAL7Lr1OS3R7Ur-uTqGiCRjReQ
 mineru/utils/run_async.py,sha256=rPeP4BCZerR8VByRDhiYzfZiahLVqoZEBVAS54dAjNg,1286
 mineru/utils/span_block_fix.py,sha256=0eVQjJCrT03woRt9hoh6Uu42Tp1dacfGTv2x3B9qq94,8797
 mineru/utils/span_pre_proc.py,sha256=h41q2uQajI0xQbc_30hqaju1dv3oVYxBAlKgURl8HIc,13692
-mineru/utils/table_merge.py,sha256=d98zNbM1ZQ8V1kUt6RugParNUNPv7DGL-XKIzR3iJVQ,15360
-mineru-2.6.6.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
-mineru-2.6.6.dist-info/METADATA,sha256=9f-9lcSQXdLCxbYmHItJbLgDc-TZG7u7dVUWMS0SzXA,73095
-mineru-2.6.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-mineru-2.6.6.dist-info/entry_points.txt,sha256=JbtrCPhx1T32s7TONUsteKg-24ZwRT1HSiFtW5jypVw,376
-mineru-2.6.6.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
-mineru-2.6.6.dist-info/RECORD,,
+mineru/utils/table_merge.py,sha256=pZHP0mIUcWPcdu0HecZDEFZ_ms_rT7C43m0Eg2iEEF4,15592
+mineru-2.6.8.dist-info/licenses/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
+mineru-2.6.8.dist-info/METADATA,sha256=nbMEGfGHC_Z7OUDegE18y0lrzSXaQCTtpWDYi_KZVSE,73142
+mineru-2.6.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+mineru-2.6.8.dist-info/entry_points.txt,sha256=JbtrCPhx1T32s7TONUsteKg-24ZwRT1HSiFtW5jypVw,376
+mineru-2.6.8.dist-info/top_level.txt,sha256=zuGQfZcbsHv4I4oKI9gaKPqEWBFm6xJroKuug2LnKP8,7
+mineru-2.6.8.dist-info/RECORD,,

{mineru-2.6.6.dist-info → mineru-2.6.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{mineru-2.6.6.dist-info → mineru-2.6.8.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mineru-2.6.6.dist-info → mineru-2.6.8.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{mineru-2.6.6.dist-info → mineru-2.6.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

mineru 2.6.6__py3-none-any.whl → 2.6.8__py3-none-any.whl

mineru 2.6.6py3-none-any.whl → 2.6.8py3-none-any.whl