PyPI - magic-pdf - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

magic-pdf 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

magic_pdf/dict2md/ocr_mkcontent.py CHANGED Viewed

@@ -138,12 +138,9 @@ def full_to_half(text: str) -> str:
     result = []
     for char in text:
         code = ord(char)
-        # Full-width ASCII variants (FF01-FF5E)
-        if 0xFF01 <= code <= 0xFF5E:
+        # Full-width letters and numbers (FF21-FF3A for A-Z, FF41-FF5A for a-z, FF10-FF19 for 0-9)
+        if (0xFF21 <= code <= 0xFF3A) or (0xFF41 <= code <= 0xFF5A) or (0xFF10 <= code <= 0xFF19):
             result.append(chr(code - 0xFEE0))  # Shift to ASCII range
-        # Full-width space
-        elif code == 0x3000:
-            result.append(' ')
         else:
             result.append(char)
     return ''.join(result)

magic_pdf/libs/performance_stats.py ADDED Viewed

@@ -0,0 +1,54 @@
+import time
+import functools
+from collections import defaultdict
+from typing import Dict, List
+class PerformanceStats:
+    """性能统计类，用于收集和展示方法执行时间"""
+    _stats: Dict[str, List[float]] = defaultdict(list)
+    @classmethod
+    def add_execution_time(cls, func_name: str, execution_time: float):
+        """添加执行时间记录"""
+        cls._stats[func_name].append(execution_time)
+    @classmethod
+    def get_stats(cls) -> Dict[str, dict]:
+        """获取统计结果"""
+        results = {}
+        for func_name, times in cls._stats.items():
+            results[func_name] = {
+                'count': len(times),
+                'total_time': sum(times),
+                'avg_time': sum(times) / len(times),
+                'min_time': min(times),
+                'max_time': max(times)
+            }
+        return results
+    @classmethod
+    def print_stats(cls):
+        """打印统计结果"""
+        stats = cls.get_stats()
+        print("\n性能统计结果:")
+        print("-" * 80)
+        print(f"{'方法名':<40} {'调用次数':>8} {'总时间(s)':>12} {'平均时间(s)':>12}")
+        print("-" * 80)
+        for func_name, data in stats.items():
+            print(f"{func_name:<40} {data['count']:8d} {data['total_time']:12.6f} {data['avg_time']:12.6f}")
+def measure_time(func):
+    """测量方法执行时间的装饰器"""
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        execution_time = time.time() - start_time
+        PerformanceStats.add_execution_time(func.__name__, execution_time)
+        return result
+    return wrapper

magic_pdf/libs/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.0"
1	+ __version__ = "1.2.2"

magic_pdf/model/doc_analyze_by_custom_model.py CHANGED Viewed

@@ -170,11 +170,7 @@ def doc_analyze(
         gpu_memory = int(os.getenv("VIRTUAL_VRAM_SIZE", round(get_vram(device))))
         if gpu_memory is not None and gpu_memory >= 8:
-            if gpu_memory >= 40:
-                batch_ratio = 32
-            elif gpu_memory >=20:
-                batch_ratio = 16
-            elif gpu_memory >= 16:
+            if gpu_memory >= 16:
                 batch_ratio = 8
             elif gpu_memory >= 10:
                 batch_ratio = 4

magic_pdf/model/magic_model.py CHANGED Viewed

@@ -528,14 +528,13 @@ class MagicModel:
             pair_dis = bbox_distance(subjects[sub_idx]['bbox'], objects[obj_idx]['bbox'])
             nearest_dis = float('inf')
             for i in range(N):
-                if i in seen_idx:continue
+                if i in seen_idx or i == sub_idx:continue
                 nearest_dis = min(nearest_dis, bbox_distance(subjects[i]['bbox'], objects[obj_idx]['bbox']))
             if pair_dis >= 3*nearest_dis:
                 seen_idx.add(sub_idx)
                 continue
             seen_idx.add(sub_idx)
             seen_idx.add(obj_idx + OBJ_IDX_OFFSET)
             seen_sub_idx.add(sub_idx)

magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py CHANGED Viewed

@@ -100,20 +100,61 @@ class UnimernetModel(object):
             res["latex"] = latex_rm_whitespace(latex)
         return formula_list
-    def batch_predict(
-        self, images_mfd_res: list, images: list, batch_size: int = 64
-    ) -> list:
+    # def batch_predict(
+    #     self, images_mfd_res: list, images: list, batch_size: int = 64
+    # ) -> list:
+    #     images_formula_list = []
+    #     mf_image_list = []
+    #     backfill_list = []
+    #     for image_index in range(len(images_mfd_res)):
+    #         mfd_res = images_mfd_res[image_index]
+    #         pil_img = Image.fromarray(images[image_index])
+    #         formula_list = []
+    #
+    #         for xyxy, conf, cla in zip(
+    #             mfd_res.boxes.xyxy, mfd_res.boxes.conf, mfd_res.boxes.cls
+    #         ):
+    #             xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
+    #             new_item = {
+    #                 "category_id": 13 + int(cla.item()),
+    #                 "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
+    #                 "score": round(float(conf.item()), 2),
+    #                 "latex": "",
+    #             }
+    #             formula_list.append(new_item)
+    #             bbox_img = pil_img.crop((xmin, ymin, xmax, ymax))
+    #             mf_image_list.append(bbox_img)
+    #
+    #         images_formula_list.append(formula_list)
+    #         backfill_list += formula_list
+    #
+    #     dataset = MathDataset(mf_image_list, transform=self.mfr_transform)
+    #     dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=0)
+    #     mfr_res = []
+    #     for mf_img in dataloader:
+    #         mf_img = mf_img.to(self.device)
+    #         with torch.no_grad():
+    #             output = self.model.generate({"image": mf_img})
+    #         mfr_res.extend(output["pred_str"])
+    #     for res, latex in zip(backfill_list, mfr_res):
+    #         res["latex"] = latex_rm_whitespace(latex)
+    #     return images_formula_list
+    def batch_predict(self, images_mfd_res: list, images: list, batch_size: int = 64) -> list:
         images_formula_list = []
         mf_image_list = []
         backfill_list = []
+        image_info = []  # Store (area, original_index, image) tuples
+        # Collect images with their original indices
         for image_index in range(len(images_mfd_res)):
             mfd_res = images_mfd_res[image_index]
             pil_img = Image.fromarray(images[image_index])
             formula_list = []
-            for xyxy, conf, cla in zip(
-                mfd_res.boxes.xyxy, mfd_res.boxes.conf, mfd_res.boxes.cls
-            ):
+            for idx, (xyxy, conf, cla) in enumerate(zip(
+                    mfd_res.boxes.xyxy, mfd_res.boxes.conf, mfd_res.boxes.cls
+            )):
                 xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
                 new_item = {
                     "category_id": 13 + int(cla.item()),
@@ -123,19 +164,43 @@ class UnimernetModel(object):
                 }
                 formula_list.append(new_item)
                 bbox_img = pil_img.crop((xmin, ymin, xmax, ymax))
+                area = (xmax - xmin) * (ymax - ymin)
+                curr_idx = len(mf_image_list)
+                image_info.append((area, curr_idx, bbox_img))
                 mf_image_list.append(bbox_img)
             images_formula_list.append(formula_list)
             backfill_list += formula_list
-        dataset = MathDataset(mf_image_list, transform=self.mfr_transform)
+        # Stable sort by area
+        image_info.sort(key=lambda x: x[0])  # sort by area
+        sorted_indices = [x[1] for x in image_info]
+        sorted_images = [x[2] for x in image_info]
+        # Create mapping for results
+        index_mapping = {new_idx: old_idx for new_idx, old_idx in enumerate(sorted_indices)}
+        # Create dataset with sorted images
+        dataset = MathDataset(sorted_images, transform=self.mfr_transform)
         dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=0)
+        # Process batches and store results
         mfr_res = []
         for mf_img in dataloader:
             mf_img = mf_img.to(self.device)
             with torch.no_grad():
                 output = self.model.generate({"image": mf_img})
             mfr_res.extend(output["pred_str"])
-        for res, latex in zip(backfill_list, mfr_res):
-            res["latex"] = latex_rm_whitespace(latex)
+        # Restore original order
+        unsorted_results = [""] * len(mfr_res)
+        for new_idx, latex in enumerate(mfr_res):
+            original_idx = index_mapping[new_idx]
+            unsorted_results[original_idx] = latex_rm_whitespace(latex)
+        # Fill results back
+        for res, latex in zip(backfill_list, unsorted_results):
+            res["latex"] = latex
         return images_formula_list

magic_pdf/pdf_parse_union_core_v2.py CHANGED Viewed

@@ -21,9 +21,12 @@ from magic_pdf.libs.config_reader import get_local_layoutreader_model_dir, get_l
 from magic_pdf.libs.convert_utils import dict_to_list
 from magic_pdf.libs.hash_utils import compute_md5
 from magic_pdf.libs.pdf_image_tools import cut_image_to_pil_image
+from magic_pdf.libs.performance_stats import measure_time, PerformanceStats
 from magic_pdf.model.magic_model import MagicModel
 from magic_pdf.post_proc.llm_aided import llm_aided_formula, llm_aided_text, llm_aided_title
+from concurrent.futures import ThreadPoolExecutor
 try:
     import torchtext
@@ -215,7 +218,7 @@ def calculate_contrast(img, img_mode) -> float:
     # logger.info(f"contrast: {contrast}")
     return round(contrast, 2)
+# @measure_time
 def txt_spans_extract_v2(pdf_page, spans, all_bboxes, all_discarded_blocks, lang):
     # cid用0xfffd表示，连字符拆开
     # text_blocks_raw = pdf_page.get_text('rawdict', flags=fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_MEDIABOX_CLIP)['blocks']
@@ -489,7 +492,7 @@ def insert_lines_into_block(block_bbox, line_height, page_w, page_h):
     else:
         return [[x0, y0, x1, y1]]
+# @measure_time
 def sort_lines_by_model(fix_blocks, page_w, page_h, line_height):
     page_line_list = []
@@ -923,7 +926,6 @@ def pdf_parse_union(
     magic_model = MagicModel(model_list, dataset)
     """根据输入的起始范围解析pdf"""
-    # end_page_id = end_page_id if end_page_id else len(pdf_docs) - 1
     end_page_id = (
         end_page_id
         if end_page_id is not None and end_page_id >= 0
@@ -960,6 +962,8 @@ def pdf_parse_union(
             )
         pdf_info_dict[f'page_{page_id}'] = page_info
+    # PerformanceStats.print_stats()
     """分段"""
     para_split(pdf_info_dict)

magic_pdf/post_proc/para_split_v3.py CHANGED Viewed

@@ -108,29 +108,32 @@ def __is_list_or_index_block(block):
         ):
             multiple_para_flag = True
-        for line in block['lines']:
-            line_mid_x = (line['bbox'][0] + line['bbox'][2]) / 2
-            block_mid_x = (block['bbox_fs'][0] + block['bbox_fs'][2]) / 2
-            if (
-                line['bbox'][0] - block['bbox_fs'][0] > 0.7 * line_height
-                and block['bbox_fs'][2] - line['bbox'][2] > 0.7 * line_height
-            ):
-                external_sides_not_close_num += 1
-            if abs(line_mid_x - block_mid_x) < line_height / 2:
-                center_close_num += 1
+        block_text = ''
+        for line in block['lines']:
             line_text = ''
             for span in line['spans']:
                 span_type = span['type']
                 if span_type == ContentType.Text:
                     line_text += span['content'].strip()
             # 添加所有文本，包括空行，保持与block['lines']长度一致
             lines_text_list.append(line_text)
             block_text = ''.join(lines_text_list)
-            block_lang = detect_lang(block_text)
-            # logger.info(f"block_lang: {block_lang}")
+        block_lang = detect_lang(block_text)
+        # logger.info(f"block_lang: {block_lang}")
+        for line in block['lines']:
+            line_mid_x = (line['bbox'][0] + line['bbox'][2]) / 2
+            block_mid_x = (block['bbox_fs'][0] + block['bbox_fs'][2]) / 2
+            if (
+                line['bbox'][0] - block['bbox_fs'][0] > 0.7 * line_height
+                and block['bbox_fs'][2] - line['bbox'][2] > 0.7 * line_height
+            ):
+                external_sides_not_close_num += 1
+            if abs(line_mid_x - block_mid_x) < line_height / 2:
+                center_close_num += 1
             # 计算line左侧顶格数量是否大于2，是否顶格用abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height/2 来判断
             if abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height / 2:

magic_pdf/pre_proc/ocr_dict_merge.py CHANGED Viewed

@@ -64,7 +64,7 @@ def span_block_type_compatible(span_type, block_type):
     if span_type in [ContentType.Text, ContentType.InlineEquation]:
         return block_type in [BlockType.Text, BlockType.Title, BlockType.ImageCaption, BlockType.ImageFootnote, BlockType.TableCaption, BlockType.TableFootnote]
     elif span_type == ContentType.InterlineEquation:
-        return block_type in [BlockType.InterlineEquation]
+        return block_type in [BlockType.InterlineEquation, BlockType.Text]
     elif span_type == ContentType.Image:
         return block_type in [BlockType.ImageBody]
     elif span_type == ContentType.Table:

{magic_pdf-1.2.0.dist-info → magic_pdf-1.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: magic-pdf
-Version: 1.2.0
+Version: 1.2.2
 Summary: A practical tool for converting PDF to Markdown
 Home-page: https://github.com/opendatalab/MinerU
 Requires-Python: >=3.9
@@ -9,7 +9,7 @@ License-File: LICENSE.md
 Requires-Dist: boto3>=1.28.43
 Requires-Dist: Brotli>=1.1.0
 Requires-Dist: click>=8.1.7
-Requires-Dist: fast-langdetect>=0.2.3
+Requires-Dist: fast-langdetect<0.3.0,>=0.2.3
 Requires-Dist: loguru>=0.6.0
 Requires-Dist: numpy<2.0.0,>=1.21.6
 Requires-Dist: pydantic>=2.7.2
@@ -94,6 +94,10 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
 </div>
 # Changelog
+- 2025/03/03 1.2.1 released, fixed several bugs:
+  - Fixed the impact on punctuation marks during full-width to half-width conversion of letters and numbers
+  - Fixed caption matching inaccuracies in certain scenarios
+  - Fixed formula span loss issues in certain scenarios
 - 2025/02/24 1.2.0 released. This version includes several fixes and improvements to enhance parsing efficiency and accuracy:
   - Performance Optimization
     - Increased classification speed for PDF documents in auto mode.

{magic_pdf-1.2.0.dist-info → magic_pdf-1.2.2.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 magic_pdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-magic_pdf/pdf_parse_union_core_v2.py,sha256=jIrXgU_gKL4toJ6GsCoDxByszaN8mAr5vrEy_c63ewk,38310
+magic_pdf/pdf_parse_union_core_v2.py,sha256=Pt3UtPQgOrF2YudQqrwVVC767_271E-LRg2aUsiggXg,38435
 magic_pdf/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 magic_pdf/config/constants.py,sha256=fXGzANULnJWLPxwYp3BEFWx-rnorzpySMx63ffyEyq4,1272
 magic_pdf/config/drop_reason.py,sha256=CqjMzBE96Qo8OeFvhhhItY8WhyqsKhE3DmyJLoQZNCc,2248
@@ -24,7 +24,7 @@ magic_pdf/data/io/base.py,sha256=SqNQqe30ZvoVvg7GVv-hLMCjN6yBgDyQQWeLgGsTfhQ,111
 magic_pdf/data/io/http.py,sha256=XlKB0DNf4a_uUnfgcclvaaOtmE7lmddx0DnK8A-emAM,958
 magic_pdf/data/io/s3.py,sha256=hyA7sbNriQy64xd_uyJ7acN_oneQ1Pdmoc7_xcvkue8,3606
 magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-magic_pdf/dict2md/ocr_mkcontent.py,sha256=ZZTaiIn18OWuWKGbDdpoOZ3VMhe_3_JKwrKCfzDiSk0,13715
+magic_pdf/dict2md/ocr_mkcontent.py,sha256=12WeBVxnBzzruk8CfYqqsV2dpH-mDWmE4Osl1RlRoc8,13741
 magic_pdf/filter/__init__.py,sha256=_7lSez_myu4b6cdzPpQ-NfREuqeBSq_QdyBPKVLyq2U,1505
 magic_pdf/filter/pdf_classify_by_type.py,sha256=YNYXamxYgEiSujwilCNHOtrwpgJGDiQ597qJfardDVc,42354
 magic_pdf/filter/pdf_meta_scan.py,sha256=eOuM0-JgaXvHolSgepGoNDJDmv_uITWLQpH_0MfnVQw,17478
@@ -49,12 +49,13 @@ magic_pdf/libs/markdown_utils.py,sha256=86v2BmsSV4NkoRZrH4uQD1youJhYFF3vIKr_vDeg
 magic_pdf/libs/path_utils.py,sha256=Hykw_l5CU736b2egHV9P7B-qh3QNKO4nZSGCbsi0Z8E,1043
 magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3396
 magic_pdf/libs/pdf_image_tools.py,sha256=kjzSEbm7K0yiHv8kJ4VbZ9HHktM8qvAv3LhxRyDZEQk,1987
+magic_pdf/libs/performance_stats.py,sha256=BFi4NIsUYlanznYoTVq4hBpj4NOuShAlWBHzebBGVYM,1702
 magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
-magic_pdf/libs/version.py,sha256=MpAT5hgNoHnTtG1XRD_GV_A7QrHVU6vJjGSw_8qMGA4,22
+magic_pdf/libs/version.py,sha256=uuf4VNtTNA93fMhoAur9YafzaKJFnczY-H1SSCSuRVQ,22
 magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
 magic_pdf/model/batch_analyze.py,sha256=sbrgOJWycb1Ep6e62CPi6jEyG6VSeklIxc4PmrqaLhM,11933
-magic_pdf/model/doc_analyze_by_custom_model.py,sha256=wma0aq6RyxAepEqnaiTJ9_pWWKLVBj39c6xWA85dxzA,8068
-magic_pdf/model/magic_model.py,sha256=OcKhSJ_PyAAldgpKPiPxi2uuvnj3Sf4SvXi_5Rv0a6Q,30667
+magic_pdf/model/doc_analyze_by_custom_model.py,sha256=T0-h4QmSIDXRzgF5uWO4jQrwIot221l26PXU52xeKiA,7933
+magic_pdf/model/magic_model.py,sha256=yZKWo_wRck_-YLyFGRiUHGar8sV1Y6458BFLbyBAt74,30682
 magic_pdf/model/model_list.py,sha256=aqfEJlEfbib3D3ISrxc0Coh6SbffYh8Yq2FlQN35_zA,213
 magic_pdf/model/pdf_extract_kit.py,sha256=Rd51VNZPKRA_tUbDss-b44d84K6WDG2S87a37Ax7HUA,12224
 magic_pdf/model/pp_structure_v2.py,sha256=NcqFWL4nUtjl82MFak8HX_8V3i4Aw_fK4dATrIp5uGs,3840
@@ -92,7 +93,7 @@ magic_pdf/model/sub_modules/mfd/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
 magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py,sha256=QfHbMr1br0pOJUu1NJEMgA6yw11G0yFImJv_AfW48_c,1008
 magic_pdf/model/sub_modules/mfd/yolov8/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 magic_pdf/model/sub_modules/mfr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py,sha256=R05qw54QuLl2btNWdkxf4yCjDeEj8o0786e-gz_Xv8k,5290
+magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py,sha256=HPNetRfQeHoHfRTzFEaIjLSHfjrxRvS-EaApMUebZuQ,8020
 magic_pdf/model/sub_modules/mfr/unimernet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 magic_pdf/model/sub_modules/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 magic_pdf/model/sub_modules/ocr/paddleocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -116,12 +117,12 @@ magic_pdf/operators/models.py,sha256=mRqbCVrxxaUVDpEBAsXaK7EL1M-goICkE1W0FYgewio
 magic_pdf/operators/pipes.py,sha256=XgBgisKQd_ruW-3Tw4v5LhqloZUHgn2aFcpi_q8LbCs,6767
 magic_pdf/post_proc/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
 magic_pdf/post_proc/llm_aided.py,sha256=yzhu2cCpUZjdwf3v0swYDgSs9VWIfMAoXepYIP1EMZs,6367
-magic_pdf/post_proc/para_split_v3.py,sha256=v4SdQn4OZdHRXpWQMfQ-FGJz_tglQ88uFUqpwY542Fo,16922
+magic_pdf/post_proc/para_split_v3.py,sha256=SPN_VVGvFX5KpFMGw9OzgoE-kTZq-FF036i0cIImGH8,16975
 magic_pdf/pre_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 magic_pdf/pre_proc/construct_page_dict.py,sha256=OFmq5XRKi6fYIo-lmGlL-NB16Sf0egzsfEx-fT2uYrc,660
 magic_pdf/pre_proc/cut_image.py,sha256=NDzbxwD7z7Tb4uAxL4KR6LzURFdN1Tzr4nPvj-VmEqc,1225
 magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=nt88ttXCEI_1ihAF7HU15SQjwM69V-iJmk-L_nyzA6o,9328
-magic_pdf/pre_proc/ocr_dict_merge.py,sha256=vrbLIzNIjxrm7PonfHaFdY6qaicc0uIly62SJwgZ5UM,5496
+magic_pdf/pre_proc/ocr_dict_merge.py,sha256=4Z3aHZ9sxzijkVpOCENslvUcpp7DXgNID4Gl3pxwIg4,5512
 magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=xrgC9vR0poklZuY4Og41pZVdXzuaGFg3BnQ01X60dpo,3102
 magic_pdf/pre_proc/remove_bbox_overlap.py,sha256=mcdxAh4P56NZ3Ij8h3vW8qC_SrszfXflVWuWUuUiTNg,3089
 magic_pdf/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
@@ -138,9 +139,9 @@ magic_pdf/tools/common.py,sha256=1LfMeXBBsb3WlGeNAze_pPOYXQ8Qbfh-JgRXweojHKo,838
 magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
 magic_pdf/utils/office_to_pdf.py,sha256=7aj-Ls2v8saD-Rgu_t3FIc-J3Ka9wnmiEH5zY-H1Vxs,729
-magic_pdf-1.2.0.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
-magic_pdf-1.2.0.dist-info/METADATA,sha256=7iel3MItxKhJc1Bbfh_NMbDp8a23k9G1vA8LYEw2k_U,40720
-magic_pdf-1.2.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-magic_pdf-1.2.0.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
-magic_pdf-1.2.0.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
-magic_pdf-1.2.0.dist-info/RECORD,,
+magic_pdf-1.2.2.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
+magic_pdf-1.2.2.dist-info/METADATA,sha256=FYzj0yWzmFAG4mQ22DH9F4KZfqexNg7YuhgiXMHc9Ug,41001
+magic_pdf-1.2.2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+magic_pdf-1.2.2.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
+magic_pdf-1.2.2.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
+magic_pdf-1.2.2.dist-info/RECORD,,

{magic_pdf-1.2.0.dist-info → magic_pdf-1.2.2.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{magic_pdf-1.2.0.dist-info → magic_pdf-1.2.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{magic_pdf-1.2.0.dist-info → magic_pdf-1.2.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{magic_pdf-1.2.0.dist-info → magic_pdf-1.2.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

magic-pdf 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

magic-pdf 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl