PyPI - magic-pdf - Versions diffs - 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

magic-pdf 1.0.1py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

magic_pdf/libs/boxbase.py +5 -2
magic_pdf/libs/draw_bbox.py +14 -2
magic_pdf/libs/language.py +9 -0
magic_pdf/libs/version.py +1 -1
magic_pdf/model/batch_analyze.py +103 -99
magic_pdf/model/doc_analyze_by_custom_model.py +77 -18
magic_pdf/model/pdf_extract_kit.py +23 -21
magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py +7 -3
magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py +1 -1
magic_pdf/model/sub_modules/model_init.py +4 -3
magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py +33 -26
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py +25 -6
magic_pdf/pdf_parse_union_core_v2.py +131 -29
magic_pdf/post_proc/llm_aided.py +59 -26
magic_pdf/post_proc/llm_aided_ocr.py +689 -0
magic_pdf/pre_proc/ocr_span_list_modify.py +1 -1
magic_pdf/resources/model_config/model_configs.yaml +2 -2
{magic_pdf-1.0.1.dist-info → magic_pdf-1.1.0.dist-info}/METADATA +50 -41
{magic_pdf-1.0.1.dist-info → magic_pdf-1.1.0.dist-info}/RECORD +23 -22
{magic_pdf-1.0.1.dist-info → magic_pdf-1.1.0.dist-info}/WHEEL +1 -1
{magic_pdf-1.0.1.dist-info → magic_pdf-1.1.0.dist-info}/LICENSE.md +0 -0
{magic_pdf-1.0.1.dist-info → magic_pdf-1.1.0.dist-info}/entry_points.txt +0 -0
{magic_pdf-1.0.1.dist-info → magic_pdf-1.1.0.dist-info}/top_level.txt +0 -0

magic_pdf/libs/boxbase.py CHANGED Viewed

@@ -185,10 +185,13 @@ def calculate_iou(bbox1, bbox2):
     bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
     bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
+    if any([bbox1_area == 0, bbox2_area == 0]):
+        return 0
     # Compute the intersection over union by taking the intersection area
     # and dividing it by the sum of both areas minus the intersection area
-    iou = intersection_area / float(bbox1_area + bbox2_area -
-                                    intersection_area)
+    iou = intersection_area / float(bbox1_area + bbox2_area - intersection_area)
     return iou

magic_pdf/libs/draw_bbox.py CHANGED Viewed

@@ -362,12 +362,24 @@ def draw_line_sort_bbox(pdf_info, pdf_bytes, out_path, filename):
     for page in pdf_info:
         page_line_list = []
         for block in page['preproc_blocks']:
-            if block['type'] in [BlockType.Text, BlockType.Title, BlockType.InterlineEquation]:
+            if block['type'] in [BlockType.Text]:
                 for line in block['lines']:
                     bbox = line['bbox']
                     index = line['index']
                     page_line_list.append({'index': index, 'bbox': bbox})
-            if block['type'] in [BlockType.Image, BlockType.Table]:
+            elif block['type'] in [BlockType.Title, BlockType.InterlineEquation]:
+                if 'virtual_lines' in block:
+                    if len(block['virtual_lines']) > 0 and block['virtual_lines'][0].get('index', None) is not None:
+                        for line in block['virtual_lines']:
+                            bbox = line['bbox']
+                            index = line['index']
+                            page_line_list.append({'index': index, 'bbox': bbox})
+                else:
+                    for line in block['lines']:
+                        bbox = line['bbox']
+                        index = line['index']
+                        page_line_list.append({'index': index, 'bbox': bbox})
+            elif block['type'] in [BlockType.Image, BlockType.Table]:
                 for sub_block in block['blocks']:
                     if sub_block['type'] in [BlockType.ImageBody, BlockType.TableBody]:
                         if len(sub_block['virtual_lines']) > 0 and sub_block['virtual_lines'][0].get('index', None) is not None:

magic_pdf/libs/language.py CHANGED Viewed

@@ -12,12 +12,20 @@ if not os.getenv("FTLANG_CACHE"):
 from fast_langdetect import detect_language
+def remove_invalid_surrogates(text):
+    # 移除无效的 UTF-16 代理对
+    return ''.join(c for c in text if not (0xD800 <= ord(c) <= 0xDFFF))
 def detect_lang(text: str) -> str:
     if len(text) == 0:
         return ""
     text = text.replace("\n", "")
+    text = remove_invalid_surrogates(text)
+    # print(text)
     try:
         lang_upper = detect_language(text)
     except:
@@ -37,3 +45,4 @@ if __name__ == '__main__':
     print(detect_lang("<html>This is a test</html>"))
     print(detect_lang("这个是中文测试。"))
     print(detect_lang("<html>这个是中文测试。</html>"))
+    print(detect_lang("〖\ud835\udc46\ud835〗这是个包含utf-16的中文测试"))

magic_pdf/libs/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.0.1"
1	+ __version__ = "1.1.0"

magic_pdf/model/batch_analyze.py CHANGED Viewed

@@ -7,19 +7,19 @@ from loguru import logger
 from PIL import Image
 from magic_pdf.config.constants import MODEL_NAME
-from magic_pdf.config.exceptions import CUDA_NOT_AVAILABLE
-from magic_pdf.data.dataset import Dataset
-from magic_pdf.libs.clean_memory import clean_memory
-from magic_pdf.libs.config_reader import get_device
-from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
+# from magic_pdf.config.exceptions import CUDA_NOT_AVAILABLE
+# from magic_pdf.data.dataset import Dataset
+# from magic_pdf.libs.clean_memory import clean_memory
+# from magic_pdf.libs.config_reader import get_device
+# from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
 from magic_pdf.model.pdf_extract_kit import CustomPEKModel
 from magic_pdf.model.sub_modules.model_utils import (
     clean_vram, crop_img, get_res_list_from_layout_res)
 from magic_pdf.model.sub_modules.ocr.paddleocr.ocr_utils import (
     get_adjusted_mfdetrec_res, get_ocr_result_list)
-from magic_pdf.operators.models import InferenceResult
+# from magic_pdf.operators.models import InferenceResult
-YOLO_LAYOUT_BASE_BATCH_SIZE = 4
+YOLO_LAYOUT_BASE_BATCH_SIZE = 1
 MFD_BASE_BATCH_SIZE = 1
 MFR_BASE_BATCH_SIZE = 16
@@ -44,19 +44,20 @@ class BatchAnalyze:
             modified_images = []
             for image_index, image in enumerate(images):
                 pil_img = Image.fromarray(image)
-                width, height = pil_img.size
-                if height > width:
-                    input_res = {'poly': [0, 0, width, 0, width, height, 0, height]}
-                    new_image, useful_list = crop_img(
-                        input_res, pil_img, crop_paste_x=width // 2, crop_paste_y=0
-                    )
-                    layout_images.append(new_image)
-                    modified_images.append([image_index, useful_list])
-                else:
-                    layout_images.append(pil_img)
+                # width, height = pil_img.size
+                # if height > width:
+                #     input_res = {'poly': [0, 0, width, 0, width, height, 0, height]}
+                #     new_image, useful_list = crop_img(
+                #         input_res, pil_img, crop_paste_x=width // 2, crop_paste_y=0
+                #     )
+                #     layout_images.append(new_image)
+                #     modified_images.append([image_index, useful_list])
+                # else:
+                layout_images.append(pil_img)
             images_layout_res += self.model.layout_model.batch_predict(
-                layout_images, self.batch_ratio * YOLO_LAYOUT_BASE_BATCH_SIZE
+                # layout_images, self.batch_ratio * YOLO_LAYOUT_BASE_BATCH_SIZE
+                layout_images, YOLO_LAYOUT_BASE_BATCH_SIZE
             )
             for image_index, useful_list in modified_images:
@@ -78,7 +79,8 @@ class BatchAnalyze:
             # 公式检测
             mfd_start_time = time.time()
             images_mfd_res = self.model.mfd_model.batch_predict(
-                images, self.batch_ratio * MFD_BASE_BATCH_SIZE
+                # images, self.batch_ratio * MFD_BASE_BATCH_SIZE
+                images, MFD_BASE_BATCH_SIZE
             )
             logger.info(
                 f'mfd time: {round(time.time() - mfd_start_time, 2)}, image num: {len(images)}'
@@ -91,10 +93,12 @@ class BatchAnalyze:
                 images,
                 batch_size=self.batch_ratio * MFR_BASE_BATCH_SIZE,
             )
+            mfr_count = 0
             for image_index in range(len(images)):
                 images_layout_res[image_index] += images_formula_list[image_index]
+                mfr_count += len(images_formula_list[image_index])
             logger.info(
-                f'mfr time: {round(time.time() - mfr_start_time, 2)}, image num: {len(images)}'
+                f'mfr time: {round(time.time() - mfr_start_time, 2)}, image num: {mfr_count}'
             )
         # 清理显存
@@ -159,7 +163,7 @@ class BatchAnalyze:
                     elif self.model.table_model_name == MODEL_NAME.TABLE_MASTER:
                         html_code = self.model.table_model.img2html(new_image)
                     elif self.model.table_model_name == MODEL_NAME.RAPID_TABLE:
-                        html_code, table_cell_bboxes, elapse = (
+                        html_code, table_cell_bboxes, logic_points, elapse = (
                             self.model.table_model.predict(new_image)
                         )
                     run_time = time.time() - single_table_start_time
@@ -195,81 +199,81 @@ class BatchAnalyze:
         return images_layout_res
-def doc_batch_analyze(
-    dataset: Dataset,
-    ocr: bool = False,
-    show_log: bool = False,
-    start_page_id=0,
-    end_page_id=None,
-    lang=None,
-    layout_model=None,
-    formula_enable=None,
-    table_enable=None,
-    batch_ratio: int | None = None,
-) -> InferenceResult:
-    """Perform batch analysis on a document dataset.
-    Args:
-        dataset (Dataset): The dataset containing document pages to be analyzed.
-        ocr (bool, optional): Flag to enable OCR (Optical Character Recognition). Defaults to False.
-        show_log (bool, optional): Flag to enable logging. Defaults to False.
-        start_page_id (int, optional): The starting page ID for analysis. Defaults to 0.
-        end_page_id (int, optional): The ending page ID for analysis. Defaults to None, which means analyze till the last page.
-        lang (str, optional): Language for OCR. Defaults to None.
-        layout_model (optional): Layout model to be used for analysis. Defaults to None.
-        formula_enable (optional): Flag to enable formula detection. Defaults to None.
-        table_enable (optional): Flag to enable table detection. Defaults to None.
-        batch_ratio (int | None, optional): Ratio for batch processing. Defaults to None, which sets it to 1.
-    Raises:
-        CUDA_NOT_AVAILABLE: If CUDA is not available, raises an exception as batch analysis is not supported in CPU mode.
-    Returns:
-        InferenceResult: The result of the batch analysis containing the analyzed data and the dataset.
-    """
-    if not torch.cuda.is_available():
-        raise CUDA_NOT_AVAILABLE('batch analyze not support in CPU mode')
-    lang = None if lang == '' else lang
-    # TODO: auto detect batch size
-    batch_ratio = 1 if batch_ratio is None else batch_ratio
-    end_page_id = end_page_id if end_page_id else len(dataset)
-    model_manager = ModelSingleton()
-    custom_model: CustomPEKModel = model_manager.get_model(
-        ocr, show_log, lang, layout_model, formula_enable, table_enable
-    )
-    batch_model = BatchAnalyze(model=custom_model, batch_ratio=batch_ratio)
-    model_json = []
-    # batch analyze
-    images = []
-    for index in range(len(dataset)):
-        if start_page_id <= index <= end_page_id:
-            page_data = dataset.get_page(index)
-            img_dict = page_data.get_image()
-            images.append(img_dict['img'])
-    analyze_result = batch_model(images)
-    for index in range(len(dataset)):
-        page_data = dataset.get_page(index)
-        img_dict = page_data.get_image()
-        page_width = img_dict['width']
-        page_height = img_dict['height']
-        if start_page_id <= index <= end_page_id:
-            result = analyze_result.pop(0)
-        else:
-            result = []
-        page_info = {'page_no': index, 'height': page_height, 'width': page_width}
-        page_dict = {'layout_dets': result, 'page_info': page_info}
-        model_json.append(page_dict)
-    # TODO: clean memory when gpu memory is not enough
-    clean_memory_start_time = time.time()
-    clean_memory(get_device())
-    logger.info(f'clean memory time: {round(time.time() - clean_memory_start_time, 2)}')
-    return InferenceResult(model_json, dataset)
+# def doc_batch_analyze(
+#     dataset: Dataset,
+#     ocr: bool = False,
+#     show_log: bool = False,
+#     start_page_id=0,
+#     end_page_id=None,
+#     lang=None,
+#     layout_model=None,
+#     formula_enable=None,
+#     table_enable=None,
+#     batch_ratio: int | None = None,
+# ) -> InferenceResult:
+#     """Perform batch analysis on a document dataset.
+#
+#     Args:
+#         dataset (Dataset): The dataset containing document pages to be analyzed.
+#         ocr (bool, optional): Flag to enable OCR (Optical Character Recognition). Defaults to False.
+#         show_log (bool, optional): Flag to enable logging. Defaults to False.
+#         start_page_id (int, optional): The starting page ID for analysis. Defaults to 0.
+#         end_page_id (int, optional): The ending page ID for analysis. Defaults to None, which means analyze till the last page.
+#         lang (str, optional): Language for OCR. Defaults to None.
+#         layout_model (optional): Layout model to be used for analysis. Defaults to None.
+#         formula_enable (optional): Flag to enable formula detection. Defaults to None.
+#         table_enable (optional): Flag to enable table detection. Defaults to None.
+#         batch_ratio (int | None, optional): Ratio for batch processing. Defaults to None, which sets it to 1.
+#
+#     Raises:
+#         CUDA_NOT_AVAILABLE: If CUDA is not available, raises an exception as batch analysis is not supported in CPU mode.
+#
+#     Returns:
+#         InferenceResult: The result of the batch analysis containing the analyzed data and the dataset.
+#     """
+#
+#     if not torch.cuda.is_available():
+#         raise CUDA_NOT_AVAILABLE('batch analyze not support in CPU mode')
+#
+#     lang = None if lang == '' else lang
+#     # TODO: auto detect batch size
+#     batch_ratio = 1 if batch_ratio is None else batch_ratio
+#     end_page_id = end_page_id if end_page_id else len(dataset)
+#
+#     model_manager = ModelSingleton()
+#     custom_model: CustomPEKModel = model_manager.get_model(
+#         ocr, show_log, lang, layout_model, formula_enable, table_enable
+#     )
+#     batch_model = BatchAnalyze(model=custom_model, batch_ratio=batch_ratio)
+#
+#     model_json = []
+#
+#     # batch analyze
+#     images = []
+#     for index in range(len(dataset)):
+#         if start_page_id <= index <= end_page_id:
+#             page_data = dataset.get_page(index)
+#             img_dict = page_data.get_image()
+#             images.append(img_dict['img'])
+#     analyze_result = batch_model(images)
+#
+#     for index in range(len(dataset)):
+#         page_data = dataset.get_page(index)
+#         img_dict = page_data.get_image()
+#         page_width = img_dict['width']
+#         page_height = img_dict['height']
+#         if start_page_id <= index <= end_page_id:
+#             result = analyze_result.pop(0)
+#         else:
+#             result = []
+#
+#         page_info = {'page_no': index, 'height': page_height, 'width': page_width}
+#         page_dict = {'layout_dets': result, 'page_info': page_info}
+#         model_json.append(page_dict)
+#
+#     # TODO: clean memory when gpu memory is not enough
+#     clean_memory_start_time = time.time()
+#     clean_memory(get_device())
+#     logger.info(f'clean memory time: {round(time.time() - clean_memory_start_time, 2)}')
+#
+#     return InferenceResult(model_json, dataset)

magic_pdf/model/doc_analyze_by_custom_model.py CHANGED Viewed

@@ -3,8 +3,12 @@ import time
 # 关闭paddle的信号处理
 import paddle
+import torch
 from loguru import logger
+from magic_pdf.model.batch_analyze import BatchAnalyze
+from magic_pdf.model.sub_modules.model_utils import get_vram
 paddle.disable_signal_handler()
 os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'  # 禁止albumentations检查更新
@@ -154,33 +158,88 @@ def doc_analyze(
     table_enable=None,
 ) -> InferenceResult:
+    end_page_id = end_page_id if end_page_id else len(dataset) - 1
     model_manager = ModelSingleton()
     custom_model = model_manager.get_model(
         ocr, show_log, lang, layout_model, formula_enable, table_enable
     )
+    batch_analyze = False
+    device = get_device()
+    npu_support = False
+    if str(device).startswith("npu"):
+        import torch_npu
+        if torch_npu.npu.is_available():
+            npu_support = True
+    if torch.cuda.is_available() and device != 'cpu' or npu_support:
+        gpu_memory = int(os.getenv("VIRTUAL_VRAM_SIZE", round(get_vram(device))))
+        if gpu_memory is not None and gpu_memory >= 8:
+            if 8 <= gpu_memory < 10:
+                batch_ratio = 2
+            elif 10 <= gpu_memory <= 12:
+                batch_ratio = 4
+            elif 12 < gpu_memory <= 16:
+                batch_ratio = 8
+            elif 16 < gpu_memory <= 24:
+                batch_ratio = 16
+            else:
+                batch_ratio = 32
+            if batch_ratio >= 1:
+                logger.info(f'gpu_memory: {gpu_memory} GB, batch_ratio: {batch_ratio}')
+                batch_model = BatchAnalyze(model=custom_model, batch_ratio=batch_ratio)
+                batch_analyze = True
     model_json = []
     doc_analyze_start = time.time()
-    if end_page_id is None:
-        end_page_id = len(dataset)
-    for index in range(len(dataset)):
-        page_data = dataset.get_page(index)
-        img_dict = page_data.get_image()
-        img = img_dict['img']
-        page_width = img_dict['width']
-        page_height = img_dict['height']
-        if start_page_id <= index <= end_page_id:
-            page_start = time.time()
-            result = custom_model(img)
-            logger.info(f'-----page_id : {index}, page total time: {round(time.time() - page_start, 2)}-----')
-        else:
-            result = []
+    if batch_analyze:
+        # batch analyze
+        images = []
+        for index in range(len(dataset)):
+            if start_page_id <= index <= end_page_id:
+                page_data = dataset.get_page(index)
+                img_dict = page_data.get_image()
+                images.append(img_dict['img'])
+        analyze_result = batch_model(images)
+        for index in range(len(dataset)):
+            page_data = dataset.get_page(index)
+            img_dict = page_data.get_image()
+            page_width = img_dict['width']
+            page_height = img_dict['height']
+            if start_page_id <= index <= end_page_id:
+                result = analyze_result.pop(0)
+            else:
+                result = []
+            page_info = {'page_no': index, 'height': page_height, 'width': page_width}
+            page_dict = {'layout_dets': result, 'page_info': page_info}
+            model_json.append(page_dict)
-        page_info = {'page_no': index, 'height': page_height, 'width': page_width}
-        page_dict = {'layout_dets': result, 'page_info': page_info}
-        model_json.append(page_dict)
+    else:
+        # single analyze
+        for index in range(len(dataset)):
+            page_data = dataset.get_page(index)
+            img_dict = page_data.get_image()
+            img = img_dict['img']
+            page_width = img_dict['width']
+            page_height = img_dict['height']
+            if start_page_id <= index <= end_page_id:
+                page_start = time.time()
+                result = custom_model(img)
+                logger.info(f'-----page_id : {index}, page total time: {round(time.time() - page_start, 2)}-----')
+            else:
+                result = []
+            page_info = {'page_no': index, 'height': page_height, 'width': page_width}
+            page_dict = {'layout_dets': result, 'page_info': page_info}
+            model_json.append(page_dict)
     gc_start = time.time()
     clean_memory(get_device())

magic_pdf/model/pdf_extract_kit.py CHANGED Viewed

@@ -69,6 +69,7 @@ class CustomPEKModel:
         self.apply_table = self.table_config.get('enable', False)
         self.table_max_time = self.table_config.get('max_time', TABLE_MAX_TIME_VALUE)
         self.table_model_name = self.table_config.get('model', MODEL_NAME.RAPID_TABLE)
+        self.table_sub_model_name = self.table_config.get('sub_model', None)
         # ocr config
         self.apply_ocr = ocr
@@ -144,7 +145,7 @@ class CustomPEKModel:
                         model_config_dir, 'layoutlmv3', 'layoutlmv3_base_inference.yaml'
                     )
                 ),
-                device=self.device,
+                device='cpu' if str(self.device).startswith("mps") else self.device,
             )
         elif self.layout_model_name == MODEL_NAME.DocLayout_YOLO:
             self.layout_model = atom_model_manager.get_atom_model(
@@ -174,6 +175,7 @@ class CustomPEKModel:
                 table_max_time=self.table_max_time,
                 device=self.device,
                 ocr_engine=self.ocr_model,
+                table_sub_model_name=self.table_sub_model_name
             )
         logger.info('DocAnalysis init done!')
@@ -192,24 +194,24 @@ class CustomPEKModel:
             layout_res = self.layout_model(image, ignore_catids=[])
         elif self.layout_model_name == MODEL_NAME.DocLayout_YOLO:
             # doclayout_yolo
-            if height > width:
-                input_res = {"poly":[0,0,width,0,width,height,0,height]}
-                new_image, useful_list = crop_img(input_res, pil_img, crop_paste_x=width//2, crop_paste_y=0)
-                paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list
-                layout_res = self.layout_model.predict(new_image)
-                for res in layout_res:
-                    p1, p2, p3, p4, p5, p6, p7, p8 = res['poly']
-                    p1 = p1 - paste_x + xmin
-                    p2 = p2 - paste_y + ymin
-                    p3 = p3 - paste_x + xmin
-                    p4 = p4 - paste_y + ymin
-                    p5 = p5 - paste_x + xmin
-                    p6 = p6 - paste_y + ymin
-                    p7 = p7 - paste_x + xmin
-                    p8 = p8 - paste_y + ymin
-                    res['poly'] = [p1, p2, p3, p4, p5, p6, p7, p8]
-            else:
-                layout_res = self.layout_model.predict(image)
+            # if height > width:
+            #     input_res = {"poly":[0,0,width,0,width,height,0,height]}
+            #     new_image, useful_list = crop_img(input_res, pil_img, crop_paste_x=width//2, crop_paste_y=0)
+            #     paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list
+            #     layout_res = self.layout_model.predict(new_image)
+            #     for res in layout_res:
+            #         p1, p2, p3, p4, p5, p6, p7, p8 = res['poly']
+            #         p1 = p1 - paste_x + xmin
+            #         p2 = p2 - paste_y + ymin
+            #         p3 = p3 - paste_x + xmin
+            #         p4 = p4 - paste_y + ymin
+            #         p5 = p5 - paste_x + xmin
+            #         p6 = p6 - paste_y + ymin
+            #         p7 = p7 - paste_x + xmin
+            #         p8 = p8 - paste_y + ymin
+            #         res['poly'] = [p1, p2, p3, p4, p5, p6, p7, p8]
+            # else:
+            layout_res = self.layout_model.predict(image)
         layout_cost = round(time.time() - layout_start, 2)
         logger.info(f'layout detection time: {layout_cost}')
@@ -228,7 +230,7 @@ class CustomPEKModel:
             logger.info(f'formula nums: {len(formula_list)}, mfr time: {mfr_cost}')
         # 清理显存
-        clean_vram(self.device, vram_threshold=8)
+        clean_vram(self.device, vram_threshold=6)
         # 从layout_res中获取ocr区域、表格区域、公式区域
         ocr_res_list, table_res_list, single_page_mfdetrec_res = (
@@ -276,7 +278,7 @@ class CustomPEKModel:
                 elif self.table_model_name == MODEL_NAME.TABLE_MASTER:
                     html_code = self.table_model.img2html(new_image)
                 elif self.table_model_name == MODEL_NAME.RAPID_TABLE:
-                    html_code, table_cell_bboxes, elapse = self.table_model.predict(
+                    html_code, table_cell_bboxes, logic_points, elapse = self.table_model.predict(
                         new_image
                     )
                 run_time = time.time() - single_table_start_time

magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py CHANGED Viewed

@@ -9,7 +9,11 @@ class DocLayoutYOLOModel(object):
     def predict(self, image):
         layout_res = []
         doclayout_yolo_res = self.model.predict(
-            image, imgsz=1024, conf=0.25, iou=0.45, verbose=False, device=self.device
+            image,
+            imgsz=1280,
+            conf=0.10,
+            iou=0.45,
+            verbose=False, device=self.device
         )[0]
         for xyxy, conf, cla in zip(
             doclayout_yolo_res.boxes.xyxy.cpu(),
@@ -32,8 +36,8 @@ class DocLayoutYOLOModel(object):
                 image_res.cpu()
                 for image_res in self.model.predict(
                     images[index : index + batch_size],
-                    imgsz=1024,
-                    conf=0.25,
+                    imgsz=1280,
+                    conf=0.10,
                     iou=0.45,
                     verbose=False,
                     device=self.device,

magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py CHANGED Viewed

@@ -89,7 +89,7 @@ class UnimernetModel(object):
             mf_image_list.append(bbox_img)
         dataset = MathDataset(mf_image_list, transform=self.mfr_transform)
-        dataloader = DataLoader(dataset, batch_size=64, num_workers=0)
+        dataloader = DataLoader(dataset, batch_size=32, num_workers=0)
         mfr_res = []
         for mf_img in dataloader:
             mf_img = mf_img.to(self.device)

magic_pdf/model/sub_modules/model_init.py CHANGED Viewed

@@ -21,7 +21,7 @@ from magic_pdf.model.sub_modules.table.tablemaster.tablemaster_paddle import \
     TableMasterPaddleModel
-def table_model_init(table_model_type, model_path, max_time, _device_='cpu', ocr_engine=None):
+def table_model_init(table_model_type, model_path, max_time, _device_='cpu', ocr_engine=None, table_sub_model_name=None):
     if table_model_type == MODEL_NAME.STRUCT_EQTABLE:
         table_model = StructTableModel(model_path, max_new_tokens=2048, max_time=max_time)
     elif table_model_type == MODEL_NAME.TABLE_MASTER:
@@ -31,7 +31,7 @@ def table_model_init(table_model_type, model_path, max_time, _device_='cpu', ocr
         }
         table_model = TableMasterPaddleModel(config)
     elif table_model_type == MODEL_NAME.RAPID_TABLE:
-        table_model = RapidTableModel(ocr_engine)
+        table_model = RapidTableModel(ocr_engine, table_sub_model_name)
     else:
         logger.error('table model type not allow')
         exit(1)
@@ -163,7 +163,8 @@ def atom_model_init(model_name: str, **kwargs):
             kwargs.get('table_model_path'),
             kwargs.get('table_max_time'),
             kwargs.get('device'),
-            kwargs.get('ocr_engine')
+            kwargs.get('ocr_engine'),
+            kwargs.get('table_sub_model_name')
         )
     elif model_name == AtomicModel.LangDetect:
         if kwargs.get('langdetect_model_name') == MODEL_NAME.YOLO_V11_LangDetect:

magic-pdf 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

magic-pdf 1.0.1py3-none-any.whl → 1.1.0py3-none-any.whl