PyPI - mineru - Versions diffs - 2.5.4__py3-none-any.whl → 2.6.1__py3-none-any.whl - Mend

mineru 2.5.4py3-none-any.whl → 2.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (103) hide show

mineru/cli/gradio_app.py CHANGED Viewed

@@ -134,20 +134,107 @@ with open(header_path, 'r') as header_file:
 latin_lang = [
-        'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',  # noqa: E126
-        'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
-        'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
-        'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german'
+        "af",
+        "az",
+        "bs",
+        "cs",
+        "cy",
+        "da",
+        "de",
+        "es",
+        "et",
+        "fr",
+        "ga",
+        "hr",
+        "hu",
+        "id",
+        "is",
+        "it",
+        "ku",
+        "la",
+        "lt",
+        "lv",
+        "mi",
+        "ms",
+        "mt",
+        "nl",
+        "no",
+        "oc",
+        "pi",
+        "pl",
+        "pt",
+        "ro",
+        "rs_latin",
+        "sk",
+        "sl",
+        "sq",
+        "sv",
+        "sw",
+        "tl",
+        "tr",
+        "uz",
+        "vi",
+        "french",
+        "german",
+        "fi",
+        "eu",
+        "gl",
+        "lb",
+        "rm",
+        "ca",
+        "qu",
 ]
-arabic_lang = ['ar', 'fa', 'ug', 'ur']
+arabic_lang = ["ar", "fa", "ug", "ur", "ps", "ku", "sd", "bal"]
 cyrillic_lang = [
-        'rs_cyrillic', 'bg', 'mn', 'abq', 'ady', 'kbd', 'ava',  # noqa: E126
-        'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
+        "ru",
+        "rs_cyrillic",
+        "be",
+        "bg",
+        "uk",
+        "mn",
+        "abq",
+        "ady",
+        "kbd",
+        "ava",
+        "dar",
+        "inh",
+        "che",
+        "lbe",
+        "lez",
+        "tab",
+        "kk",
+        "ky",
+        "tg",
+        "mk",
+        "tt",
+        "cv",
+        "ba",
+        "mhr",
+        "mo",
+        "udm",
+        "kv",
+        "os",
+        "bua",
+        "xal",
+        "tyv",
+        "sah",
+        "kaa",
 ]
 east_slavic_lang = ["ru", "be", "uk"]
 devanagari_lang = [
-        'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',  # noqa: E126
-        'sa', 'bgc'
+        "hi",
+        "mr",
+        "ne",
+        "bh",
+        "mai",
+        "ang",
+        "bho",
+        "mah",
+        "sck",
+        "new",
+        "gom",
+        "sa",
+        "bgc",
 ]
 other_lang = ['ch', 'ch_lite', 'ch_server', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka', "el", "th"]
 add_lang = ['latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari']

mineru/cli/models_download.py CHANGED Viewed

@@ -70,6 +70,7 @@ def download_pipeline_models():
         ModelPath.unet_structure,
         ModelPath.paddle_table_cls,
         ModelPath.paddle_orientation_classification,
+        ModelPath.pp_formulanet_plus_m,
     ]
     download_finish_path = ""
     for model_path in model_paths:

mineru/model/mfr/pp_formulanet_plus_m/predict_formula.py ADDED Viewed

@@ -0,0 +1,152 @@
+import os
+import torch
+import yaml
+from pathlib import Path
+from loguru import logger
+from tqdm import tqdm
+from mineru.model.utils.tools.infer import pytorchocr_utility
+from mineru.model.utils.pytorchocr.base_ocr_v20 import BaseOCRV20
+from .processors import (
+    UniMERNetImgDecode,
+    UniMERNetTestTransform,
+    LatexImageFormat,
+    ToBatch,
+    UniMERNetDecode,
+)
+class FormulaRecognizer(BaseOCRV20):
+    def __init__(
+        self,
+        weight_dir,
+        device="cpu",
+    ):
+        self.weights_path = os.path.join(
+            weight_dir,
+            "PP-FormulaNet_plus-M.pth",
+        )
+        self.yaml_path = os.path.join(
+            Path(__file__).parent.parent.parent,
+            "utils",
+            "pytorchocr",
+            "utils",
+            "resources",
+            "pp_formulanet_arch_config.yaml"
+        )
+        self.infer_yaml_path = os.path.join(
+            weight_dir,
+            "PP-FormulaNet_plus-M_inference.yml",
+        )
+        network_config = pytorchocr_utility.AnalysisConfig(
+            self.weights_path, self.yaml_path
+        )
+        weights = self.read_pytorch_weights(self.weights_path)
+        super(FormulaRecognizer, self).__init__(network_config)
+        self.load_state_dict(weights)
+        self.device = torch.device(device) if isinstance(device, str) else device
+        self.net.to(self.device)
+        self.net.eval()
+        with open(self.infer_yaml_path, "r", encoding="utf-8") as yaml_file:
+            data = yaml.load(yaml_file, Loader=yaml.FullLoader)
+        self.pre_tfs = {
+            "UniMERNetImgDecode": UniMERNetImgDecode(input_size=(384, 384)),
+            "UniMERNetTestTransform": UniMERNetTestTransform(),
+            "LatexImageFormat": LatexImageFormat(),
+            "ToBatch": ToBatch(),
+        }
+        self.post_op = UniMERNetDecode(
+            character_list=data["PostProcess"]["character_dict"]
+        )
+    def predict(self, img_list, batch_size: int = 64):
+        # Reduce batch size by 50% to avoid potential memory issues during inference.
+        batch_size = int(0.5 * batch_size)
+        batch_imgs = self.pre_tfs["UniMERNetImgDecode"](imgs=img_list)
+        batch_imgs = self.pre_tfs["UniMERNetTestTransform"](imgs=batch_imgs)
+        batch_imgs = self.pre_tfs["LatexImageFormat"](imgs=batch_imgs)
+        inp = self.pre_tfs["ToBatch"](imgs=batch_imgs)
+        inp = torch.from_numpy(inp[0])
+        inp = inp.to(self.device)
+        rec_formula = []
+        with torch.no_grad():
+            with tqdm(total=len(inp), desc="MFR Predict") as pbar:
+                for index in range(0, len(inp), batch_size):
+                    batch_data = inp[index: index + batch_size]
+                    # with torch.amp.autocast(device_type=self.device.type):
+                    #     batch_preds = [self.net(batch_data)]
+                    batch_preds = [self.net(batch_data)]
+                    batch_preds = [p.reshape([-1]) for p in batch_preds[0]]
+                    batch_preds = [bp.cpu().numpy() for bp in batch_preds]
+                    rec_formula += self.post_op(batch_preds)
+                    pbar.update(len(batch_preds))
+        return rec_formula
+    def batch_predict(
+        self, images_mfd_res: list, images: list, batch_size: int = 64
+    ) -> list:
+        images_formula_list = []
+        mf_image_list = []
+        backfill_list = []
+        image_info = []  # Store (area, original_index, image) tuples
+        # Collect images with their original indices
+        for image_index in range(len(images_mfd_res)):
+            mfd_res = images_mfd_res[image_index]
+            image = images[image_index]
+            formula_list = []
+            for idx, (xyxy, conf, cla) in enumerate(
+                zip(mfd_res.boxes.xyxy, mfd_res.boxes.conf, mfd_res.boxes.cls)
+            ):
+                xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
+                new_item = {
+                    "category_id": 13 + int(cla.item()),
+                    "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
+                    "score": round(float(conf.item()), 2),
+                    "latex": "",
+                }
+                formula_list.append(new_item)
+                bbox_img = image[ymin:ymax, xmin:xmax]
+                area = (xmax - xmin) * (ymax - ymin)
+                curr_idx = len(mf_image_list)
+                image_info.append((area, curr_idx, bbox_img))
+                mf_image_list.append(bbox_img)
+            images_formula_list.append(formula_list)
+            backfill_list += formula_list
+        # Stable sort by area
+        image_info.sort(key=lambda x: x[0])  # sort by area
+        sorted_indices = [x[1] for x in image_info]
+        sorted_images = [x[2] for x in image_info]
+        # Create mapping for results
+        index_mapping = {
+            new_idx: old_idx for new_idx, old_idx in enumerate(sorted_indices)
+        }
+        if len(sorted_images) > 0:
+            # 进行预测
+            batch_size = min(batch_size, max(1, 2 ** (len(sorted_images).bit_length() - 1))) if sorted_images else 1
+            rec_formula = self.predict(sorted_images, batch_size)
+        else:
+            rec_formula = []
+        # Restore original order
+        unsorted_results = [""] * len(rec_formula)
+        for new_idx, latex in enumerate(rec_formula):
+            original_idx = index_mapping[new_idx]
+            unsorted_results[original_idx] = latex
+        for res, latex in zip(backfill_list, unsorted_results):
+            res["latex"] = latex
+        return images_formula_list

mineru 2.5.4__py3-none-any.whl → 2.6.1__py3-none-any.whl

mineru 2.5.4py3-none-any.whl → 2.6.1py3-none-any.whl