PyPI - mineru - Versions diffs - 2.5.3__py3-none-any.whl → 2.6.0__py3-none-any.whl - Mend

mineru 2.5.3py3-none-any.whl → 2.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py CHANGED Viewed

@@ -12,27 +12,114 @@ from loguru import logger
 from mineru.utils.config_reader import get_device
 from mineru.utils.enum_class import ModelPath
 from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
-from ....utils.ocr_utils import check_img, preprocess_image, sorted_boxes, merge_det_boxes, update_det_boxes, get_rotate_crop_image
-from .tools.infer.predict_system import TextSystem
-from .tools.infer import pytorchocr_utility as utility
+from mineru.utils.ocr_utils import check_img, preprocess_image, sorted_boxes, merge_det_boxes, update_det_boxes, get_rotate_crop_image
+from mineru.model.utils.tools.infer.predict_system import TextSystem
+from mineru.model.utils.tools.infer import pytorchocr_utility as utility
 import argparse
 latin_lang = [
-        'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',  # noqa: E126
-        'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
-        'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
-        'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german'
+        "af",
+        "az",
+        "bs",
+        "cs",
+        "cy",
+        "da",
+        "de",
+        "es",
+        "et",
+        "fr",
+        "ga",
+        "hr",
+        "hu",
+        "id",
+        "is",
+        "it",
+        "ku",
+        "la",
+        "lt",
+        "lv",
+        "mi",
+        "ms",
+        "mt",
+        "nl",
+        "no",
+        "oc",
+        "pi",
+        "pl",
+        "pt",
+        "ro",
+        "rs_latin",
+        "sk",
+        "sl",
+        "sq",
+        "sv",
+        "sw",
+        "tl",
+        "tr",
+        "uz",
+        "vi",
+        "french",
+        "german",
+        "fi",
+        "eu",
+        "gl",
+        "lb",
+        "rm",
+        "ca",
+        "qu",
 ]
-arabic_lang = ['ar', 'fa', 'ug', 'ur']
+arabic_lang = ["ar", "fa", "ug", "ur", "ps", "ku", "sd", "bal"]
 cyrillic_lang = [
-        'rs_cyrillic', 'bg', 'mn', 'abq', 'ady', 'kbd', 'ava',  # noqa: E126
-        'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
+        "ru",
+        "rs_cyrillic",
+        "be",
+        "bg",
+        "uk",
+        "mn",
+        "abq",
+        "ady",
+        "kbd",
+        "ava",
+        "dar",
+        "inh",
+        "che",
+        "lbe",
+        "lez",
+        "tab",
+        "kk",
+        "ky",
+        "tg",
+        "mk",
+        "tt",
+        "cv",
+        "ba",
+        "mhr",
+        "mo",
+        "udm",
+        "kv",
+        "os",
+        "bua",
+        "xal",
+        "tyv",
+        "sah",
+        "kaa",
 ]
 east_slavic_lang = ["ru", "be", "uk"]
 devanagari_lang = [
-        'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',  # noqa: E126
-        'sa', 'bgc'
+        "hi",
+        "mr",
+        "ne",
+        "bh",
+        "mai",
+        "ang",
+        "bho",
+        "mah",
+        "sck",
+        "new",
+        "gom",
+        "sa",
+        "bgc",
 ]
@@ -47,7 +134,7 @@ def get_model_params(lang, config):
         raise Exception (f'Language {lang} not supported')
-root_dir = Path(__file__).resolve().parent
+root_dir = os.path.join(Path(__file__).resolve().parent.parent.parent, 'utils')
 class PytorchPaddleOCR(TextSystem):
@@ -65,14 +152,14 @@ class PytorchPaddleOCR(TextSystem):
         if self.lang in latin_lang:
             self.lang = 'latin'
+        elif self.lang in east_slavic_lang:
+            self.lang = 'east_slavic'
         elif self.lang in arabic_lang:
             self.lang = 'arabic'
         elif self.lang in cyrillic_lang:
             self.lang = 'cyrillic'
         elif self.lang in devanagari_lang:
             self.lang = 'devanagari'
-        elif self.lang in east_slavic_lang:
-            self.lang = 'east_slavic'
         else:
             pass
@@ -89,7 +176,7 @@ class PytorchPaddleOCR(TextSystem):
         kwargs['det_model_path'] = det_model_path
         kwargs['rec_model_path'] = rec_model_path
         kwargs['rec_char_dict_path'] = os.path.join(root_dir, 'pytorchocr', 'utils', 'resources', 'dict', dict_file)
-        kwargs['rec_batch_num'] = 8
+        kwargs['rec_batch_num'] = 6
         kwargs['device'] = device

mineru/model/table/rec/unet_table/main.py CHANGED Viewed

@@ -184,7 +184,7 @@ class WiredTableRecognition:
                 continue
             # 从img中截取对应的区域
             x1, y1, x2, y2 = int(box[0][0])+1, int(box[0][1])+1, int(box[2][0])-1, int(box[2][1])-1
-            if x1 >= x2 or y1 >= y2:
+            if x1 >= x2 or y1 >= y2 or x1 < 0 or y1 < 0:
                 # logger.warning(f"Invalid box coordinates: {x1, y1, x2, y2}")
                 continue
             # 判断长宽比

mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/data/imaug/operators.py RENAMED Viewed

@@ -23,6 +23,7 @@ import sys
 import six
 import cv2
 import numpy as np
+from PIL import Image
 class DecodeImage(object):
@@ -104,16 +105,15 @@ class NormalizeImage(object):
         shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
         self.mean = np.array(mean).reshape(shape).astype('float32')
         self.std = np.array(std).reshape(shape).astype('float32')
+        self.scale = self.scale / self.std
+        self.mean = self.mean / self.std
     def __call__(self, data):
         img = data['image']
-        from PIL import Image
         if isinstance(img, Image.Image):
             img = np.array(img)
-        assert isinstance(img,
-                          np.ndarray), "invalid input 'img' in NormalizeImage"
-        data['image'] = (
-            img.astype('float32') * self.scale - self.mean) / self.std
+        data['image'] = img.astype('float32') * self.scale - self.mean
         return data

mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/__init__.py RENAMED Viewed

@@ -37,7 +37,7 @@ def build_backbone(config, model_type):
         from .rec_mobilenet_v3 import MobileNetV3
         from .rec_svtrnet import SVTRNet
         from .rec_mv1_enhance import MobileNetV1Enhance
-        from .rec_pphgnetv2 import PPHGNetV2_B4
+        from .rec_pphgnetv2 import PPHGNetV2_B4, PPHGNetV2_B6_Formula
         support_dict = [
             "MobileNetV1Enhance",
             "MobileNetV3",
@@ -51,6 +51,7 @@ def build_backbone(config, model_type):
             "PPLCNetV3",
             "PPHGNet_small",
             "PPHGNetV2_B4",
+            "PPHGNetV2_B6_Formula"
         ]
     else:
         raise NotImplementedError

mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_lcnetv3.py RENAMED Viewed

@@ -245,18 +245,18 @@ class LearnableRepLayer(nn.Module):
             return 0, 0
         elif isinstance(branch, ConvBNLayer):
             kernel = branch.conv.weight
-            running_mean = branch.bn._mean
-            running_var = branch.bn._variance
+            running_mean = branch.bn.running_mean
+            running_var = branch.bn.running_var
             gamma = branch.bn.weight
             beta = branch.bn.bias
-            eps = branch.bn._epsilon
+            eps = branch.bn.eps
         else:
             assert isinstance(branch, nn.BatchNorm2d)
             if not hasattr(self, "id_tensor"):
                 input_dim = self.in_channels // self.groups
                 kernel_value = torch.zeros(
                     (self.in_channels, input_dim, self.kernel_size, self.kernel_size),
-                    dtype=branch.weight.dtype,
+                    dtype=branch.weight.dtype, device=branch.weight.device,
                 )
                 for i in range(self.in_channels):
                     kernel_value[
@@ -264,11 +264,11 @@ class LearnableRepLayer(nn.Module):
                     ] = 1
                 self.id_tensor = kernel_value
             kernel = self.id_tensor
-            running_mean = branch._mean
-            running_var = branch._variance
+            running_mean = branch.running_mean
+            running_var = branch.running_var
             gamma = branch.weight
             beta = branch.bias
-            eps = branch._epsilon
+            eps = branch.eps
         std = (running_var + eps).sqrt()
         t = (gamma / std).reshape((-1, 1, 1, 1))
         return kernel * t, beta - running_mean * gamma / std

mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/backbones/rec_pphgnetv2.py RENAMED Viewed

@@ -1626,8 +1626,8 @@ class PPHGNetV2_B6_Formula(nn.Module):
             pixel_values = torch.repeat_interleave(pixel_values, repeats=3, dim=1)
         pphgnet_b6_output = self.pphgnet_b6(pixel_values)
         b, c, h, w = pphgnet_b6_output.shape
-        pphgnet_b6_output = pphgnet_b6_output.reshape([b, c, h * w]).transpose(
-            [0, 2, 1]
+        pphgnet_b6_output = pphgnet_b6_output.reshape([b, c, h * w]).permute(
+            0, 2, 1
         )
         pphgnet_b6_output = DonutSwinModelOutput(
             last_hidden_state=pphgnet_b6_output,

mineru/model/{ocr/paddleocr2pytorch → utils}/pytorchocr/modeling/heads/__init__.py RENAMED Viewed

@@ -22,6 +22,7 @@ def build_head(config, **kwargs):
     # rec head
     from .rec_ctc_head import CTCHead
     from .rec_multi_head import MultiHead
+    from .rec_ppformulanet_head import PPFormulaNet_Head
     # cls head
     from .cls_head import ClsHead
@@ -32,6 +33,7 @@ def build_head(config, **kwargs):
         "ClsHead",
         "MultiHead",
         "PFHeadLocal",
+        "PPFormulaNet_Head",
     ]
     module_name = config.pop("name")

mineru 2.5.3__py3-none-any.whl → 2.6.0__py3-none-any.whl

mineru 2.5.3py3-none-any.whl → 2.6.0py3-none-any.whl