PyPI - magic-pdf - Versions diffs - 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

magic-pdf 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

magic_pdf/model/sub_modules/model_init.py CHANGED Viewed

@@ -5,47 +5,57 @@ from magic_pdf.config.constants import MODEL_NAME
 from magic_pdf.model.model_list import AtomicModel
 from magic_pdf.model.sub_modules.language_detection.yolov11.YOLOv11 import YOLOv11LangDetModel
 from magic_pdf.model.sub_modules.layout.doclayout_yolo.DocLayoutYOLO import DocLayoutYOLOModel
-from magic_pdf.model.sub_modules.layout.layoutlmv3.model_init import Layoutlmv3_Predictor
 from magic_pdf.model.sub_modules.mfd.yolov8.YOLOv8 import YOLOv8MFDModel
 from magic_pdf.model.sub_modules.mfr.unimernet.Unimernet import UnimernetModel
-try:
-    from magic_pdf_ascend_plugin.libs.license_verifier import load_license, LicenseFormatError, LicenseSignatureError, LicenseExpiredError
-    from magic_pdf_ascend_plugin.model_plugin.ocr.paddleocr.ppocr_273_npu import ModifiedPaddleOCR
-    from magic_pdf_ascend_plugin.model_plugin.table.rapidtable.rapid_table_npu import RapidTableModel
-    license_key = load_license()
-    logger.info(f'Using Ascend Plugin Success, License id is {license_key["payload"]["id"]},'
-                f' License expired at {license_key["payload"]["date"]["end_date"]}')
-except Exception as e:
-    if isinstance(e, ImportError):
-        pass
-    elif isinstance(e, LicenseFormatError):
-        logger.error("Ascend Plugin: Invalid license format. Please check the license file.")
-    elif isinstance(e, LicenseSignatureError):
-        logger.error("Ascend Plugin: Invalid signature. The license may be tampered with.")
-    elif isinstance(e, LicenseExpiredError):
-        logger.error("Ascend Plugin: License has expired. Please renew your license.")
-    elif isinstance(e, FileNotFoundError):
-        logger.error("Ascend Plugin: Not found License file.")
-    else:
-        logger.error(f"Ascend Plugin: {e}")
-    from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_273_mod import ModifiedPaddleOCR
-    # from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_291_mod import ModifiedPaddleOCR
-    from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import RapidTableModel
-from magic_pdf.model.sub_modules.table.structeqtable.struct_eqtable import StructTableModel
-from magic_pdf.model.sub_modules.table.tablemaster.tablemaster_paddle import TableMasterPaddleModel
-def table_model_init(table_model_type, model_path, max_time, _device_='cpu', ocr_engine=None, table_sub_model_name=None):
+from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.pytorch_paddle import PytorchPaddleOCR
+from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import RapidTableModel
+# try:
+#     from magic_pdf_ascend_plugin.libs.license_verifier import (
+#         LicenseExpiredError, LicenseFormatError, LicenseSignatureError,
+#         load_license)
+#     from magic_pdf_ascend_plugin.model_plugin.ocr.paddleocr.ppocr_273_npu import ModifiedPaddleOCR
+#     from magic_pdf_ascend_plugin.model_plugin.table.rapidtable.rapid_table_npu import RapidTableModel
+#     license_key = load_license()
+#     logger.info(f'Using Ascend Plugin Success, License id is {license_key["payload"]["id"]},'
+#                 f' License expired at {license_key["payload"]["date"]["end_date"]}')
+# except Exception as e:
+#     if isinstance(e, ImportError):
+#         pass
+#     elif isinstance(e, LicenseFormatError):
+#         logger.error('Ascend Plugin: Invalid license format. Please check the license file.')
+#     elif isinstance(e, LicenseSignatureError):
+#         logger.error('Ascend Plugin: Invalid signature. The license may be tampered with.')
+#     elif isinstance(e, LicenseExpiredError):
+#         logger.error('Ascend Plugin: License has expired. Please renew your license.')
+#     elif isinstance(e, FileNotFoundError):
+#         logger.error('Ascend Plugin: Not found License file.')
+#     else:
+#         logger.error(f'Ascend Plugin: {e}')
+#     from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_273_mod import ModifiedPaddleOCR
+#     # from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_291_mod import ModifiedPaddleOCR
+#     from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import RapidTableModel
+def table_model_init(table_model_type, model_path, max_time, _device_='cpu', lang=None, table_sub_model_name=None):
     if table_model_type == MODEL_NAME.STRUCT_EQTABLE:
+        from magic_pdf.model.sub_modules.table.structeqtable.struct_eqtable import StructTableModel
         table_model = StructTableModel(model_path, max_new_tokens=2048, max_time=max_time)
     elif table_model_type == MODEL_NAME.TABLE_MASTER:
+        from magic_pdf.model.sub_modules.table.tablemaster.tablemaster_paddle import TableMasterPaddleModel
         config = {
             'model_dir': model_path,
             'device': _device_
         }
         table_model = TableMasterPaddleModel(config)
     elif table_model_type == MODEL_NAME.RAPID_TABLE:
+        atom_model_manager = AtomModelSingleton()
+        ocr_engine = atom_model_manager.get_atom_model(
+            atom_model_name='ocr',
+            ocr_show_log=False,
+            det_db_box_thresh=0.5,
+            det_db_unclip_ratio=1.6,
+            lang=lang
+        )
         table_model = RapidTableModel(ocr_engine, table_sub_model_name)
     else:
         logger.error('table model type not allow')
@@ -55,7 +65,7 @@ def table_model_init(table_model_type, model_path, max_time, _device_='cpu', ocr
 def mfd_model_init(weight, device='cpu'):
-    if str(device).startswith("npu"):
+    if str(device).startswith('npu'):
         device = torch.device(device)
     mfd_model = YOLOv8MFDModel(weight, device)
     return mfd_model
@@ -67,19 +77,20 @@ def mfr_model_init(weight_dir, cfg_path, device='cpu'):
 def layout_model_init(weight, config_file, device):
+    from magic_pdf.model.sub_modules.layout.layoutlmv3.model_init import Layoutlmv3_Predictor
     model = Layoutlmv3_Predictor(weight, config_file, device)
     return model
 def doclayout_yolo_model_init(weight, device='cpu'):
-    if str(device).startswith("npu"):
+    if str(device).startswith('npu'):
         device = torch.device(device)
     model = DocLayoutYOLOModel(weight, device)
     return model
 def langdetect_model_init(langdetect_model_weight, device='cpu'):
-    if str(device).startswith("npu"):
+    if str(device).startswith('npu'):
         device = torch.device(device)
     model = YOLOv11LangDetModel(langdetect_model_weight, device)
     return model
@@ -92,7 +103,8 @@ def ocr_model_init(show_log: bool = False,
                    det_db_unclip_ratio=1.8,
                    ):
     if lang is not None and lang != '':
-        model = ModifiedPaddleOCR(
+        # model = ModifiedPaddleOCR(
+        model = PytorchPaddleOCR(
             show_log=show_log,
             det_db_box_thresh=det_db_box_thresh,
             lang=lang,
@@ -100,7 +112,8 @@ def ocr_model_init(show_log: bool = False,
             det_db_unclip_ratio=det_db_unclip_ratio,
         )
     else:
-        model = ModifiedPaddleOCR(
+        # model = ModifiedPaddleOCR(
+        model = PytorchPaddleOCR(
             show_log=show_log,
             det_db_box_thresh=det_db_box_thresh,
             use_dilation=use_dilation,
@@ -129,7 +142,7 @@ class AtomModelSingleton:
         elif atom_model_name in [AtomicModel.Layout]:
             key = (atom_model_name, layout_model_name)
         elif atom_model_name in [AtomicModel.Table]:
-            key = (atom_model_name, table_model_name)
+            key = (atom_model_name, table_model_name, lang)
         else:
             key = atom_model_name
@@ -177,7 +190,7 @@ def atom_model_init(model_name: str, **kwargs):
             kwargs.get('table_model_path'),
             kwargs.get('table_max_time'),
             kwargs.get('device'),
-            kwargs.get('ocr_engine'),
+            kwargs.get('lang'),
             kwargs.get('table_sub_model_name')
         )
     elif model_name == AtomicModel.LangDetect:

magic_pdf/model/sub_modules/model_utils.py CHANGED Viewed

@@ -1,25 +1,31 @@
 import time
 import torch
-from PIL import Image
 from loguru import logger
+import numpy as np
 from magic_pdf.libs.clean_memory import clean_memory
-def crop_img(input_res, input_pil_img, crop_paste_x=0, crop_paste_y=0):
+def crop_img(input_res, input_np_img, crop_paste_x=0, crop_paste_y=0):
     crop_xmin, crop_ymin = int(input_res['poly'][0]), int(input_res['poly'][1])
     crop_xmax, crop_ymax = int(input_res['poly'][4]), int(input_res['poly'][5])
-    # Create a white background with an additional width and height of 50
+    # Calculate new dimensions
     crop_new_width = crop_xmax - crop_xmin + crop_paste_x * 2
     crop_new_height = crop_ymax - crop_ymin + crop_paste_y * 2
-    return_image = Image.new('RGB', (crop_new_width, crop_new_height), 'white')
-    # Crop image
-    crop_box = (crop_xmin, crop_ymin, crop_xmax, crop_ymax)
-    cropped_img = input_pil_img.crop(crop_box)
-    return_image.paste(cropped_img, (crop_paste_x, crop_paste_y))
-    return_list = [crop_paste_x, crop_paste_y, crop_xmin, crop_ymin, crop_xmax, crop_ymax, crop_new_width, crop_new_height]
+    # Create a white background array
+    return_image = np.ones((crop_new_height, crop_new_width, 3), dtype=np.uint8) * 255
+    # Crop the original image using numpy slicing
+    cropped_img = input_np_img[crop_ymin:crop_ymax, crop_xmin:crop_xmax]
+    # Paste the cropped image onto the white background
+    return_image[crop_paste_y:crop_paste_y + (crop_ymax - crop_ymin),
+    crop_paste_x:crop_paste_x + (crop_xmax - crop_xmin)] = cropped_img
+    return_list = [crop_paste_x, crop_paste_y, crop_xmin, crop_ymin, crop_xmax, crop_ymax, crop_new_width,
+                   crop_new_height]
     return return_image, return_list

magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Copyright (c) Opendatalab. All rights reserved.

magic_pdf/model/sub_modules/ocr/{paddleocr → paddleocr2pytorch}/ocr_utils.py RENAMED Viewed

@@ -1,58 +1,67 @@
+# Copyright (c) Opendatalab. All rights reserved.
+import copy
 import cv2
 import numpy as np
-from loguru import logger
-from io import BytesIO
-from PIL import Image
-import base64
-from magic_pdf.libs.boxbase import __is_overlaps_y_exceeds_threshold
 from magic_pdf.pre_proc.ocr_dict_merge import merge_spans_to_line
-import importlib.resources
-from paddleocr import PaddleOCR
-from ppocr.utils.utility import check_and_read
+from magic_pdf.libs.boxbase import __is_overlaps_y_exceeds_threshold
 def img_decode(content: bytes):
     np_arr = np.frombuffer(content, dtype=np.uint8)
     return cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED)
 def check_img(img):
     if isinstance(img, bytes):
         img = img_decode(img)
-    if isinstance(img, str):
-        image_file = img
-        img, flag_gif, flag_pdf = check_and_read(image_file)
-        if not flag_gif and not flag_pdf:
-            with open(image_file, 'rb') as f:
-                img_str = f.read()
-                img = img_decode(img_str)
-            if img is None:
-                try:
-                    buf = BytesIO()
-                    image = BytesIO(img_str)
-                    im = Image.open(image)
-                    rgb = im.convert('RGB')
-                    rgb.save(buf, 'jpeg')
-                    buf.seek(0)
-                    image_bytes = buf.read()
-                    data_base64 = str(base64.b64encode(image_bytes),
-                                      encoding="utf-8")
-                    image_decode = base64.b64decode(data_base64)
-                    img_array = np.frombuffer(image_decode, np.uint8)
-                    img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-                except:
-                    logger.error("error in loading image:{}".format(image_file))
-                    return None
-        if img is None:
-            logger.error("error in loading image:{}".format(image_file))
-            return None
     if isinstance(img, np.ndarray) and len(img.shape) == 2:
         img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+    return img
+def alpha_to_color(img, alpha_color=(255, 255, 255)):
+    if len(img.shape) == 3 and img.shape[2] == 4:
+        B, G, R, A = cv2.split(img)
+        alpha = A / 255
+        R = (alpha_color[0] * (1 - alpha) + R * alpha).astype(np.uint8)
+        G = (alpha_color[1] * (1 - alpha) + G * alpha).astype(np.uint8)
+        B = (alpha_color[2] * (1 - alpha) + B * alpha).astype(np.uint8)
+        img = cv2.merge((B, G, R))
     return img
+def preprocess_image(_image):
+    alpha_color = (255, 255, 255)
+    _image = alpha_to_color(_image, alpha_color)
+    return _image
+def sorted_boxes(dt_boxes):
+    """
+    Sort text boxes in order from top to bottom, left to right
+    args:
+        dt_boxes(array):detected text boxes with shape [4, 2]
+    return:
+        sorted boxes(array) with shape [4, 2]
+    """
+    num_boxes = dt_boxes.shape[0]
+    sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
+    _boxes = list(sorted_boxes)
+    for i in range(num_boxes - 1):
+        for j in range(i, -1, -1):
+            if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \
+                    (_boxes[j + 1][0][0] < _boxes[j][0][0]):
+                tmp = _boxes[j]
+                _boxes[j] = _boxes[j + 1]
+                _boxes[j + 1] = tmp
+            else:
+                break
+    return _boxes
 def bbox_to_points(bbox):
     """ 将bbox格式转换为四个顶点的数组 """
     x0, y0, x1, y1 = bbox
@@ -252,9 +261,10 @@ def get_adjusted_mfdetrec_res(single_page_mfdetrec_res, useful_list):
     return adjusted_mfdetrec_res
-def get_ocr_result_list(ocr_res, useful_list):
+def get_ocr_result_list(ocr_res, useful_list, ocr_enable, new_image, lang):
     paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list
     ocr_result_list = []
+    ori_im = new_image.copy()
     for box_ocr_res in ocr_res:
         if len(box_ocr_res) == 2:
@@ -266,6 +276,11 @@ def get_ocr_result_list(ocr_res, useful_list):
         else:
             p1, p2, p3, p4 = box_ocr_res
             text, score = "", 1
+            if ocr_enable:
+                tmp_box = copy.deepcopy(np.array([p1, p2, p3, p4]).astype('float32'))
+                img_crop = get_rotate_crop_image(ori_im, tmp_box)
         # average_angle_degrees = calculate_angle_degrees(box_ocr_res[0])
         # if average_angle_degrees > 0.5:
         poly = [p1, p2, p3, p4]
@@ -288,12 +303,22 @@ def get_ocr_result_list(ocr_res, useful_list):
         p3 = [p3[0] - paste_x + xmin, p3[1] - paste_y + ymin]
         p4 = [p4[0] - paste_x + xmin, p4[1] - paste_y + ymin]
-        ocr_result_list.append({
-            'category_id': 15,
-            'poly': p1 + p2 + p3 + p4,
-            'score': float(round(score, 2)),
-            'text': text,
-        })
+        if ocr_enable:
+            ocr_result_list.append({
+                'category_id': 15,
+                'poly': p1 + p2 + p3 + p4,
+                'score': 1,
+                'text': text,
+                'np_img': img_crop,
+                'lang': lang,
+            })
+        else:
+            ocr_result_list.append({
+                'category_id': 15,
+                'poly': p1 + p2 + p3 + p4,
+                'score': float(round(score, 2)),
+                'text': text,
+            })
     return ocr_result_list
@@ -308,56 +333,36 @@ def calculate_is_angle(poly):
         return True
-class ONNXModelSingleton:
-    _instance = None
-    _models = {}
-    def __new__(cls, *args, **kwargs):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-        return cls._instance
-    def get_onnx_model(self, **kwargs):
-        lang = kwargs.get('lang', None)
-        det_db_box_thresh = kwargs.get('det_db_box_thresh', 0.3)
-        use_dilation = kwargs.get('use_dilation', True)
-        det_db_unclip_ratio = kwargs.get('det_db_unclip_ratio', 1.8)
-        key = (lang, det_db_box_thresh, use_dilation, det_db_unclip_ratio)
-        if key not in self._models:
-            self._models[key] = onnx_model_init(key)
-        return self._models[key]
-def onnx_model_init(key):
-    if len(key) < 4:
-        logger.error('Invalid key length, expected at least 4 elements')
-        exit(1)
-    try:
-        with importlib.resources.path('rapidocr_onnxruntime.models', '') as resource_path:
-            additional_ocr_params = {
-                "use_onnx": True,
-                "det_model_dir": f'{resource_path}/ch_PP-OCRv4_det_infer.onnx',
-                "rec_model_dir": f'{resource_path}/ch_PP-OCRv4_rec_infer.onnx',
-                "cls_model_dir": f'{resource_path}/ch_ppocr_mobile_v2.0_cls_infer.onnx',
-                "det_db_box_thresh": key[1],
-                "use_dilation": key[2],
-                "det_db_unclip_ratio": key[3],
-            }
-            if key[0] is not None:
-                additional_ocr_params["lang"] = key[0]
-            # logger.info(f"additional_ocr_params: {additional_ocr_params}")
-            onnx_model = PaddleOCR(**additional_ocr_params)
-            if onnx_model is None:
-                logger.error('model init failed')
-                exit(1)
-            else:
-                return onnx_model
-    except Exception as e:
-        logger.exception(f'Error initializing model: {e}')
-        exit(1)
+def get_rotate_crop_image(img, points):
+    '''
+    img_height, img_width = img.shape[0:2]
+    left = int(np.min(points[:, 0]))
+    right = int(np.max(points[:, 0]))
+    top = int(np.min(points[:, 1]))
+    bottom = int(np.max(points[:, 1]))
+    img_crop = img[top:bottom, left:right, :].copy()
+    points[:, 0] = points[:, 0] - left
+    points[:, 1] = points[:, 1] - top
+    '''
+    assert len(points) == 4, "shape of points must be 4*2"
+    img_crop_width = int(
+        max(
+            np.linalg.norm(points[0] - points[1]),
+            np.linalg.norm(points[2] - points[3])))
+    img_crop_height = int(
+        max(
+            np.linalg.norm(points[0] - points[3]),
+            np.linalg.norm(points[1] - points[2])))
+    pts_std = np.float32([[0, 0], [img_crop_width, 0],
+                          [img_crop_width, img_crop_height],
+                          [0, img_crop_height]])
+    M = cv2.getPerspectiveTransform(points, pts_std)
+    dst_img = cv2.warpPerspective(
+        img,
+        M, (img_crop_width, img_crop_height),
+        borderMode=cv2.BORDER_REPLICATE,
+        flags=cv2.INTER_CUBIC)
+    dst_img_height, dst_img_width = dst_img.shape[0:2]
+    if dst_img_height * 1.0 / dst_img_width >= 1.5:
+        dst_img = np.rot90(dst_img)
+    return dst_img

magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py ADDED Viewed

@@ -0,0 +1,193 @@
+# Copyright (c) Opendatalab. All rights reserved.
+import copy
+import os.path
+import warnings
+from pathlib import Path
+import cv2
+import numpy as np
+import yaml
+from loguru import logger
+from magic_pdf.libs.config_reader import get_device, get_local_models_dir
+from .ocr_utils import check_img, preprocess_image, sorted_boxes, merge_det_boxes, update_det_boxes, get_rotate_crop_image
+from .tools.infer.predict_system import TextSystem
+from .tools.infer import pytorchocr_utility as utility
+import argparse
+latin_lang = [
+        'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',  # noqa: E126
+        'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
+        'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
+        'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german'
+]
+arabic_lang = ['ar', 'fa', 'ug', 'ur']
+cyrillic_lang = [
+        'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',  # noqa: E126
+        'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
+]
+devanagari_lang = [
+        'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',  # noqa: E126
+        'sa', 'bgc'
+]
+def get_model_params(lang, config):
+    if lang in config['lang']:
+        params = config['lang'][lang]
+        det = params.get('det')
+        rec = params.get('rec')
+        dict_file = params.get('dict')
+        return det, rec, dict_file
+    else:
+        raise Exception (f'Language {lang} not supported')
+root_dir = Path(__file__).resolve().parent
+class PytorchPaddleOCR(TextSystem):
+    def __init__(self, *args, **kwargs):
+        parser = utility.init_args()
+        args = parser.parse_args(args)
+        self.lang = kwargs.get('lang', 'ch')
+        if self.lang in latin_lang:
+            self.lang = 'latin'
+        elif self.lang in arabic_lang:
+            self.lang = 'arabic'
+        elif self.lang in cyrillic_lang:
+            self.lang = 'cyrillic'
+        elif self.lang in devanagari_lang:
+            self.lang = 'devanagari'
+        else:
+            pass
+        models_config_path = os.path.join(root_dir, 'pytorchocr', 'utils', 'resources', 'models_config.yml')
+        with open(models_config_path) as file:
+            config = yaml.safe_load(file)
+            det, rec, dict_file = get_model_params(self.lang, config)
+        ocr_models_dir = os.path.join(get_local_models_dir(), 'OCR', 'paddleocr_torch')
+        kwargs['det_model_path'] = os.path.join(ocr_models_dir, det)
+        kwargs['rec_model_path'] = os.path.join(ocr_models_dir, rec)
+        kwargs['rec_char_dict_path'] = os.path.join(root_dir, 'pytorchocr', 'utils', 'resources', 'dict', dict_file)
+        # kwargs['rec_batch_num'] = 8
+        kwargs['device'] = get_device()
+        default_args = vars(args)
+        default_args.update(kwargs)
+        args = argparse.Namespace(**default_args)
+        super().__init__(args)
+    def ocr(self,
+            img,
+            det=True,
+            rec=True,
+            mfd_res=None,
+            tqdm_enable=False,
+            ):
+        assert isinstance(img, (np.ndarray, list, str, bytes))
+        if isinstance(img, list) and det == True:
+            logger.error('When input a list of images, det must be false')
+            exit(0)
+        img = check_img(img)
+        imgs = [img]
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=RuntimeWarning)
+            if det and rec:
+                ocr_res = []
+                for img in imgs:
+                    img = preprocess_image(img)
+                    dt_boxes, rec_res = self.__call__(img, mfd_res=mfd_res)
+                    if not dt_boxes and not rec_res:
+                        ocr_res.append(None)
+                        continue
+                    tmp_res = [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
+                    ocr_res.append(tmp_res)
+                return ocr_res
+            elif det and not rec:
+                ocr_res = []
+                for img in imgs:
+                    img = preprocess_image(img)
+                    dt_boxes, elapse = self.text_detector(img)
+                    # logger.debug("dt_boxes num : {}, elapsed : {}".format(len(dt_boxes), elapse))
+                    if dt_boxes is None:
+                        ocr_res.append(None)
+                        continue
+                    dt_boxes = sorted_boxes(dt_boxes)
+                    # merge_det_boxes 和 update_det_boxes 都会把poly转成bbox再转回poly，因此需要过滤所有倾斜程度较大的文本框
+                    dt_boxes = merge_det_boxes(dt_boxes)
+                    if mfd_res:
+                        dt_boxes = update_det_boxes(dt_boxes, mfd_res)
+                    tmp_res = [box.tolist() for box in dt_boxes]
+                    ocr_res.append(tmp_res)
+                return ocr_res
+            elif not det and rec:
+                ocr_res = []
+                for img in imgs:
+                    if not isinstance(img, list):
+                        img = preprocess_image(img)
+                        img = [img]
+                    rec_res, elapse = self.text_recognizer(img, tqdm_enable=tqdm_enable)
+                    # logger.debug("rec_res num  : {}, elapsed : {}".format(len(rec_res), elapse))
+                    ocr_res.append(rec_res)
+                return ocr_res
+    def __call__(self, img, mfd_res=None):
+        if img is None:
+            logger.debug("no valid image provided")
+            return None, None
+        ori_im = img.copy()
+        dt_boxes, elapse = self.text_detector(img)
+        if dt_boxes is None:
+            logger.debug("no dt_boxes found, elapsed : {}".format(elapse))
+            return None, None
+        else:
+            pass
+            # logger.debug("dt_boxes num : {}, elapsed : {}".format(len(dt_boxes), elapse))
+        img_crop_list = []
+        dt_boxes = sorted_boxes(dt_boxes)
+        # merge_det_boxes 和 update_det_boxes 都会把poly转成bbox再转回poly，因此需要过滤所有倾斜程度较大的文本框
+        dt_boxes = merge_det_boxes(dt_boxes)
+        if mfd_res:
+            dt_boxes = update_det_boxes(dt_boxes, mfd_res)
+        for bno in range(len(dt_boxes)):
+            tmp_box = copy.deepcopy(dt_boxes[bno])
+            img_crop = get_rotate_crop_image(ori_im, tmp_box)
+            img_crop_list.append(img_crop)
+        rec_res, elapse = self.text_recognizer(img_crop_list)
+        # logger.debug("rec_res num  : {}, elapsed : {}".format(len(rec_res), elapse))
+        filter_boxes, filter_rec_res = [], []
+        for box, rec_result in zip(dt_boxes, rec_res):
+            text, score = rec_result
+            if score >= self.drop_score:
+                filter_boxes.append(box)
+                filter_rec_res.append(rec_result)
+        return filter_boxes, filter_rec_res
+if __name__ == '__main__':
+    pytorch_paddle_ocr = PytorchPaddleOCR()
+    img = cv2.imread("/Users/myhloli/Downloads/screenshot-20250326-194348.png")
+    dt_boxes, rec_res = pytorch_paddle_ocr(img)
+    ocr_res = []
+    if not dt_boxes and not rec_res:
+        ocr_res.append(None)
+    else:
+        tmp_res = [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
+        ocr_res.append(tmp_res)
+    print(ocr_res)

magic-pdf 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

magic-pdf 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl