PyPI - paddlex - Versions diffs - 3.0.0rc1__py3-none-any.whl → 3.0.1__py3-none-any.whl - Mend

paddlex 3.0.0rc1py3-none-any.whl → 3.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (233) hide show

paddlex/inference/models/doc_vlm/processors/qwen2_vl.py CHANGED Viewed

@@ -12,15 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import base64
-import math
-from io import BytesIO
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Union
 import numpy as np
-import paddle
-import requests
-from PIL import Image
 from .....utils import logging
 from ....utils.benchmark import benchmark
@@ -33,10 +27,12 @@ from .common import (
     TensorType,
     TextInput,
     convert_to_rgb,
+    fetch_image,
     get_image_size,
     infer_channel_dimension_format,
-    is_valid_image,
+    make_batched_images,
     make_list_of_images,
+    smart_resize,
     to_channel_dimension_format,
     to_numpy_array,
     valid_images,
@@ -82,7 +78,7 @@ class Qwen2VLProcessor(object):
         self.image_processor.min_pixels = kwargs.get("min_pixels", 3136)
         self.image_processor.max_pixels = kwargs.get("max_pixels", 12845056)
-    def _preprocess(
+    def preprocess(
         self,
         images: ImageInput = None,
         text: Union[TextInput, List[TextInput]] = None,
@@ -182,33 +178,6 @@ class Qwen2VLProcessor(object):
         return self.tokenizer.decode(*args, **kwargs)
-def make_batched_images(images) -> List[List[ImageInput]]:
-    """
-    Accepts images in list or nested list format, and makes a list of images for preprocessing.
-    Args:
-        images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`):
-            The input image.
-    Returns:
-        list: A list of images.
-    """
-    if (
-        isinstance(images, (list, tuple))
-        and isinstance(images[0], (list, tuple))
-        and is_valid_image(images[0][0])
-    ):
-        return [img for img_list in images for img in img_list]
-    elif isinstance(images, (list, tuple)) and is_valid_image(images[0]):
-        return images
-    elif is_valid_image(images):
-        return [images]
-    raise ValueError(f"Could not make batched images from {images}")
 class Qwen2VLImageProcessor(object):
     r"""
     Constructs a Qwen2-VL image processor that dynamically resizes images based on the original images.
@@ -360,6 +329,7 @@ class Qwen2VLImageProcessor(object):
                     factor=self.patch_size * self.merge_size,
                     min_pixels=self.min_pixels,
                     max_pixels=self.max_pixels,
+                    max_ratio=MAX_RATIO,
                 )
                 image = image.astype("uint8")
                 image = resize(
@@ -527,159 +497,34 @@ class Qwen2VLImageProcessor(object):
         return self.preprocess(images, **kwargs)
-def round_by_factor(number: int, factor: int) -> int:
-    """Returns the closest integer to 'number' that is divisible by 'factor'."""
-    return round(number / factor) * factor
-def ceil_by_factor(number: int, factor: int) -> int:
-    """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'."""
-    return math.ceil(number / factor) * factor
-def floor_by_factor(number: int, factor: int) -> int:
-    """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'."""
-    return math.floor(number / factor) * factor
-def smart_resize(
-    height: int,
-    width: int,
-    factor: int = IMAGE_FACTOR,
-    min_pixels: int = MIN_PIXELS,
-    max_pixels: int = MAX_PIXELS,
-) -> Tuple[int, int]:
-    """
-    Rescales the image so that the following conditions are met:
-    1. Both dimensions (height and width) are divisible by 'factor'.
-    2. The total number of pixels is within the range ['min_pixels', 'max_pixels'].
-    3. The aspect ratio of the image is maintained as closely as possible.
-    """
-    if max(height, width) / min(height, width) > MAX_RATIO:
-        raise ValueError(
-            f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)}"
-        )
-    h_bar = max(factor, round_by_factor(height, factor))
-    w_bar = max(factor, round_by_factor(width, factor))
-    if h_bar * w_bar > max_pixels:
-        beta = math.sqrt((height * width) / max_pixels)
-        h_bar = floor_by_factor(height / beta, factor)
-        w_bar = floor_by_factor(width / beta, factor)
-    elif h_bar * w_bar < min_pixels:
-        beta = math.sqrt(min_pixels / (height * width))
-        h_bar = ceil_by_factor(height * beta, factor)
-        w_bar = ceil_by_factor(width * beta, factor)
-    return h_bar, w_bar
-def fetch_image(
-    ele: Dict[str, Union[str, Image.Image]], size_factor: int = IMAGE_FACTOR
-) -> Image.Image:
-    if not isinstance(ele, dict):
-        ele = {"image": ele}
-    if "image" in ele:
-        image = ele["image"]
-    else:
-        image = ele["image_url"]
-    image_obj = None
-    if isinstance(image, Image.Image):
-        image_obj = image
-    elif isinstance(image, np.ndarray):
-        image_obj = Image.fromarray(image)
-    elif image.startswith("http://") or image.startswith("https://"):
-        image_obj = Image.open(requests.get(image, stream=True).raw)
-    elif image.startswith("file://"):
-        image_obj = Image.open(image[7:])
-    elif image.startswith("data:image"):
-        data = image.split(";", 1)[1]
-        if data.startswith("base64,"):
-            data = base64.b64decode(data[7:])
-            image_obj = Image.open(BytesIO(data))
-    else:
-        image_obj = Image.open(image)
-    if image_obj is None:
-        raise ValueError(
-            f"Unrecognized image input, support local path, http url, base64 and PIL.Image, got {image}"
-        )
-    image = image_obj.convert("RGB")
-    # resize
-    if "resized_height" in ele and "resized_width" in ele:
-        resized_height, resized_width = smart_resize(
-            ele["resized_height"],
-            ele["resized_width"],
-            factor=size_factor,
-        )
-    else:
-        width, height = image.size  # Image, not tensor
-        min_pixels = ele.get("min_pixels", MIN_PIXELS)
-        max_pixels = ele.get("max_pixels", MAX_PIXELS)
-        resized_height, resized_width = smart_resize(
-            height,
-            width,
-            factor=size_factor,
-            min_pixels=min_pixels,
-            max_pixels=max_pixels,
-        )
-    image = image.resize((resized_width, resized_height))
-    return image
-def extract_vision_info(
-    conversations: Union[List[dict], List[List[dict]]]
-) -> List[dict]:
-    vision_infos = []
-    if isinstance(conversations[0], dict):
-        conversations = [conversations]
-    for conversation in conversations:
-        for message in conversation:
-            if isinstance(message["content"], list):
-                for ele in message["content"]:
-                    if (
-                        "image" in ele
-                        or "image_url" in ele
-                        or ele["type"] in ("image", "image_url")
-                    ):
-                        vision_infos.append(ele)
-    return vision_infos
-def process_vision_info(
-    conversations: Union[List[dict], List[List[dict]]],
-) -> Tuple[
-    Union[List[Image.Image], None, List[Union[paddle.Tensor, List[Image.Image]]], None]
-]:
-    vision_infos = extract_vision_info(conversations)
-    image_inputs = []
-    for vision_info in vision_infos:
-        if "image" in vision_info or "image_url" in vision_info:
-            image_inputs.append(fetch_image(vision_info))
-        else:
-            raise ValueError("image, image_url should in content.")
-    if len(image_inputs) == 0:
-        image_inputs = None
-    return image_inputs
 class PPDocBeeProcessor(Qwen2VLProcessor):
     """
     PP-DocBee processor, based on Qwen2VLProcessor
     """
     @benchmark.timeit
-    def preprocess(self, image: Union[str, Image.Image, np.ndarray], query: str):
+    def preprocess(self, input_dicts):
         """
         PreProcess for PP-DocBee Series
         """
-        image_inputs = fetch_image(image)
+        assert (
+            isinstance(input_dicts, list) and len(input_dicts) == 1
+        ), f"PP-DocBee series only supports batchsize of one, but received {len(input_dicts)} samples."
+        input_dict = input_dicts[0]
+        image = input_dict["image"]
+        query = input_dict["query"]
+        image_inputs = fetch_image(
+            image,
+            size_factor=IMAGE_FACTOR,
+            min_pixels=MIN_PIXELS,
+            max_pixels=MAX_PIXELS,
+            max_ratio=MAX_RATIO,
+        )
         image_pad_token = "<|vision_start|><|image_pad|><|vision_end|>"
         text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{image_pad_token}{query}<|im_end|>\n<|im_start|>assistant\n"
         text = [text]
-        rst_inputs = self._preprocess(
+        rst_inputs = super().preprocess(
             text=text,
             images=[image_inputs],
             padding=False,

paddlex/inference/models/formula_recognition/predictor.py CHANGED Viewed

@@ -97,7 +97,13 @@ class FormulaRecPredictor(BasePredictor):
             batch_imgs = self.pre_tfs["UniMERNetImgDecode"](imgs=batch_raw_imgs)
             batch_imgs = self.pre_tfs["UniMERNetTestTransform"](imgs=batch_imgs)
             batch_imgs = self.pre_tfs["UniMERNetImageFormat"](imgs=batch_imgs)
-        elif self.model_name in ("PP-FormulaNet-S", "PP-FormulaNet-L"):
+        elif self.model_name in (
+            "PP-FormulaNet-S",
+            "PP-FormulaNet-L",
+            "PP-FormulaNet_plus-S",
+            "PP-FormulaNet_plus-M",
+            "PP-FormulaNet_plus-L",
+        ):
             batch_imgs = self.pre_tfs["UniMERNetImgDecode"](imgs=batch_raw_imgs)
             batch_imgs = self.pre_tfs["UniMERNetTestTransform"](imgs=batch_imgs)
             batch_imgs = self.pre_tfs["LatexImageFormat"](imgs=batch_imgs)

paddlex/inference/models/formula_recognition/processors.py CHANGED Viewed

@@ -15,9 +15,7 @@
 import json
 import math
-import os
 import re
-import tempfile
 from typing import Any, Dict, List, Optional, Tuple, Union
 import numpy as np
@@ -325,14 +323,9 @@ class LaTeXOCRDecode(object):
             **kwargs: Additional keyword arguments for initialization.
         """
         super(LaTeXOCRDecode, self).__init__()
-        temp_path = tempfile.gettempdir()
-        rec_char_dict_path = os.path.join(temp_path, "latexocr_tokenizer.json")
-        try:
-            with open(rec_char_dict_path, "w") as f:
-                json.dump(character_list, f)
-        except Exception as e:
-            print(f"创建 latexocr_tokenizer.json 文件失败, 原因{str(e)}")
-        self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)
+        fast_tokenizer_str = json.dumps(character_list)
+        fast_tokenizer_buffer = fast_tokenizer_str.encode("utf-8")
+        self.tokenizer = TokenizerFast.from_buffer(fast_tokenizer_buffer)
     def post_process(self, s: str) -> str:
         """Post-processes the decoded LaTeX string.
@@ -372,7 +365,7 @@ class LaTeXOCRDecode(object):
         dec = [self.tokenizer.decode(tok) for tok in tokens]
         dec_str_list = [
             "".join(detok.split(" "))
-            .replace("Ġ", " ")
+            .replace("臓", " ")
             .replace("[EOS]", "")
             .replace("[BOS]", "")
             .replace("[PAD]", "")
@@ -631,74 +624,65 @@ class UniMERNetDecode(object):
         self.pad_token_type_id = 0
         self.pad_to_multiple_of = None
-        temp_path = tempfile.gettempdir()
-        fast_tokenizer_file = os.path.join(temp_path, "tokenizer.json")
-        tokenizer_config_file = os.path.join(temp_path, "tokenizer_config.json")
-        try:
-            with open(fast_tokenizer_file, "w") as f:
-                json.dump(character_list["fast_tokenizer_file"], f)
-            with open(tokenizer_config_file, "w") as f:
-                json.dump(character_list["tokenizer_config_file"], f)
-        except Exception as e:
-            print(
-                f"创建 tokenizer.json 和 tokenizer_config.json 文件失败, 原因{str(e)}"
-            )
-        self.tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
+        fast_tokenizer_str = json.dumps(character_list["fast_tokenizer_file"])
+        fast_tokenizer_buffer = fast_tokenizer_str.encode("utf-8")
+        self.tokenizer = TokenizerFast.from_buffer(fast_tokenizer_buffer)
+        tokenizer_config = (
+            character_list["tokenizer_config_file"]
+            if "tokenizer_config_file" in character_list
+            else None
+        )
         added_tokens_decoder = {}
         added_tokens_map = {}
-        if tokenizer_config_file is not None:
-            with open(
-                tokenizer_config_file, encoding="utf-8"
-            ) as tokenizer_config_handle:
-                init_kwargs = json.load(tokenizer_config_handle)
-                if "added_tokens_decoder" in init_kwargs:
-                    for idx, token in init_kwargs["added_tokens_decoder"].items():
-                        if isinstance(token, dict):
-                            token = AddedToken(**token)
-                        if isinstance(token, AddedToken):
-                            added_tokens_decoder[int(idx)] = token
-                            added_tokens_map[str(token)] = token
-                        else:
-                            raise ValueError(
-                                f"Found a {token.__class__} in the saved `added_tokens_decoder`, should be a dictionary or an AddedToken instance"
-                            )
-                init_kwargs["added_tokens_decoder"] = added_tokens_decoder
-                added_tokens_decoder = init_kwargs.pop("added_tokens_decoder", {})
-                tokens_to_add = [
-                    token
-                    for index, token in sorted(
-                        added_tokens_decoder.items(), key=lambda x: x[0]
-                    )
-                    if token not in added_tokens_decoder
-                ]
-                added_tokens_encoder = self.added_tokens_encoder(added_tokens_decoder)
-                encoder = list(added_tokens_encoder.keys()) + [
-                    str(token) for token in tokens_to_add
-                ]
-                tokens_to_add += [
-                    token
-                    for token in self.all_special_tokens_extended
-                    if token not in encoder and token not in tokens_to_add
-                ]
-                if len(tokens_to_add) > 0:
-                    is_last_special = None
-                    tokens = []
-                    special_tokens = self.all_special_tokens
-                    for token in tokens_to_add:
-                        is_special = (
-                            (token.special or str(token) in special_tokens)
-                            if isinstance(token, AddedToken)
-                            else str(token) in special_tokens
+        if tokenizer_config is not None:
+            init_kwargs = tokenizer_config
+            if "added_tokens_decoder" in init_kwargs:
+                for idx, token in init_kwargs["added_tokens_decoder"].items():
+                    if isinstance(token, dict):
+                        token = AddedToken(**token)
+                    if isinstance(token, AddedToken):
+                        added_tokens_decoder[int(idx)] = token
+                        added_tokens_map[str(token)] = token
+                    else:
+                        raise ValueError(
+                            f"Found a {token.__class__} in the saved `added_tokens_decoder`, should be a dictionary or an AddedToken instance"
                         )
-                        if is_last_special is None or is_last_special == is_special:
-                            tokens.append(token)
-                        else:
-                            self._add_tokens(tokens, special_tokens=is_last_special)
-                            tokens = [token]
-                        is_last_special = is_special
-                    if tokens:
+            init_kwargs["added_tokens_decoder"] = added_tokens_decoder
+            added_tokens_decoder = init_kwargs.pop("added_tokens_decoder", {})
+            tokens_to_add = [
+                token
+                for index, token in sorted(
+                    added_tokens_decoder.items(), key=lambda x: x[0]
+                )
+                if token not in added_tokens_decoder
+            ]
+            added_tokens_encoder = self.added_tokens_encoder(added_tokens_decoder)
+            encoder = list(added_tokens_encoder.keys()) + [
+                str(token) for token in tokens_to_add
+            ]
+            tokens_to_add += [
+                token
+                for token in self.all_special_tokens_extended
+                if token not in encoder and token not in tokens_to_add
+            ]
+            if len(tokens_to_add) > 0:
+                is_last_special = None
+                tokens = []
+                special_tokens = self.all_special_tokens
+                for token in tokens_to_add:
+                    is_special = (
+                        (token.special or str(token) in special_tokens)
+                        if isinstance(token, AddedToken)
+                        else str(token) in special_tokens
+                    )
+                    if is_last_special is None or is_last_special == is_special:
+                        tokens.append(token)
+                    else:
                         self._add_tokens(tokens, special_tokens=is_last_special)
+                        tokens = [token]
+                    is_last_special = is_special
+                if tokens:
+                    self._add_tokens(tokens, special_tokens=is_last_special)
     def _add_tokens(
         self, new_tokens: "List[Union[AddedToken, str]]", special_tokens: bool = False
@@ -814,7 +798,7 @@ class UniMERNetDecode(object):
             for i in reversed(range(len(toks[b]))):
                 if toks[b][i] is None:
                     toks[b][i] = ""
-                toks[b][i] = toks[b][i].replace("Ġ", " ").strip()
+                toks[b][i] = toks[b][i].replace("臓", " ").strip()
                 if toks[b][i] in (
                     [
                         self.tokenizer.bos_token,
@@ -858,8 +842,27 @@ class UniMERNetDecode(object):
         text_reg = r"(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})"
         letter = "[a-zA-Z]"
         noletter = "[\W_^\d]"
-        names = [x[0].replace(" ", "") for x in re.findall(text_reg, s)]
-        s = re.sub(text_reg, lambda match: str(names.pop(0)), s)
+        names = []
+        for x in re.findall(text_reg, s):
+            pattern = r"\\[a-zA-Z]+"
+            pattern = r"(\\[a-zA-Z]+)\s(?=\w)|\\[a-zA-Z]+\s(?=})"
+            matches = re.findall(pattern, x[0])
+            for m in matches:
+                if (
+                    m
+                    not in [
+                        "\\operatorname",
+                        "\\mathrm",
+                        "\\text",
+                        "\\mathbf",
+                    ]
+                    and m.strip() != ""
+                ):
+                    s = s.replace(m, m + "XXXXXXX")
+                    s = s.replace(" ", "")
+                    names.append(s)
+        if len(names) > 0:
+            s = re.sub(text_reg, lambda match: str(names.pop(0)), s)
         news = s
         while True:
             s = news
@@ -868,7 +871,16 @@ class UniMERNetDecode(object):
             news = re.sub(r"(%s)\s+?(%s)" % (letter, noletter), r"\1\2", news)
             if news == s:
                 break
-        return s
+        return s.replace("XXXXXXX", " ")
+    def remove_chinese_text_wrapping(self, formula):
+        pattern = re.compile(r"\\text\s*{\s*([^}]*?[\u4e00-\u9fff]+[^}]*?)\s*}")
+        def replacer(match):
+            return match.group(1)
+        replaced_formula = pattern.sub(replacer, formula)
+        return replaced_formula.replace('"', "")
     def post_process(self, text: str) -> str:
         """Post-processes a string by fixing text and normalizing it.
@@ -881,6 +893,7 @@ class UniMERNetDecode(object):
         """
         from ftfy import fix_text
+        text = self.remove_chinese_text_wrapping(text)
         text = fix_text(text)
         text = self.normalize(text)
         return text

paddlex/inference/models/formula_recognition/result.py CHANGED Viewed

@@ -15,9 +15,9 @@
 import copy
 import math
 import os
+import re
 import subprocess
 import tempfile
-from pathlib import Path
 from typing import List, Optional
 import numpy as np
@@ -32,19 +32,11 @@ from ...common.result import BaseCVResult, JsonMixin
 if is_dep_available("opencv-contrib-python"):
     import cv2
-if is_dep_available("PyMuPDF"):
-    import fitz
+if is_dep_available("pypdfium2"):
+    import pypdfium2 as pdfium
 class FormulaRecResult(BaseCVResult):
-    def _get_input_fn(self):
-        fn = super()._get_input_fn()
-        if (page_idx := self["page_index"]) is not None:
-            fp = Path(fn)
-            stem, suffix = fp.stem, fp.suffix
-            return f"{stem}_{page_idx}{suffix}"
-        else:
-            return fn
     def _to_str(self, *args, **kwargs):
         data = copy.deepcopy(self)
@@ -126,6 +118,7 @@ def get_align_equation(equation: str) -> str:
     """
     is_align = False
     equation = str(equation) + "\n"
     begin_dict = [
         r"begin{align}",
         r"begin{align*}",
@@ -147,6 +140,17 @@ def get_align_equation(equation: str) -> str:
     return equation
+def add_text_for_zh_formula(formula: str) -> str:
+    pattern = re.compile(r"([^\x00-\x7F]+)")
+    def replacer(match):
+        return f"\\text{{{match.group(1)}}}"
+    replaced_formula = pattern.sub(replacer, formula)
+    return replaced_formula
 def generate_tex_file(tex_file_path: str, equation: str) -> None:
     """
     Generates a LaTeX file containing a specific equation.
@@ -161,17 +165,19 @@ def generate_tex_file(tex_file_path: str, equation: str) -> None:
     """
     with custom_open(tex_file_path, "w") as fp:
         start_template = (
-            r"\documentclass{article}" + "\n"
+            r"\documentclass[varwidth]{standalone}" + "\n"
             r"\usepackage{cite}" + "\n"
             r"\usepackage{amsmath,amssymb,amsfonts,upgreek}" + "\n"
             r"\usepackage{graphicx}" + "\n"
             r"\usepackage{textcomp}" + "\n"
+            r"\usepackage{xeCJK}" + "\n"
             r"\DeclareMathSizes{14}{14}{9.8}{7}" + "\n"
             r"\pagestyle{empty}" + "\n"
             r"\begin{document}" + "\n"
             r"\begin{large}" + "\n"
         )
         fp.write(start_template)
+        equation = add_text_for_zh_formula(equation)
         equation = get_align_equation(equation)
         fp.write(equation)
         end_template = r"\end{large}" + "\n" r"\end{document}" + "\n"
@@ -197,7 +203,7 @@ def generate_pdf_file(
                         and None if an error occurred during the pdflatex execution.
     """
     if os.path.exists(tex_path):
-        command = "pdflatex -interaction=nonstopmode -halt-on-error -output-directory={} {}".format(
+        command = "xelatex -interaction=nonstopmode -halt-on-error -output-directory={} {}".format(
             pdf_dir, tex_path
         )
         if is_debug:
@@ -236,7 +242,7 @@ def crop_white_area(image: np.ndarray) -> Optional[List[int]]:
         return None
-@function_requires_deps("PyMuPDF", "opencv-contrib-python")
+@function_requires_deps("pypdfium2", "opencv-contrib-python")
 def pdf2img(pdf_path: str, img_path: str, is_padding: bool = False):
     """
     Converts a single-page PDF to an image, optionally cropping white areas and adding padding.
@@ -249,21 +255,16 @@ def pdf2img(pdf_path: str, img_path: str, is_padding: bool = False):
     Returns:
         np.ndarray: The resulting image as a NumPy array, or None if the PDF is not single-page.
     """
-    pdfDoc = fitz.open(pdf_path)
-    if pdfDoc.page_count != 1:
+    pdfDoc = pdfium.PdfDocument(pdf_path)
+    if len(pdfDoc) != 1:
         return None
-    for pg in range(pdfDoc.page_count):
-        page = pdfDoc[pg]
+    for page in pdfDoc:
         rotate = int(0)
-        zoom_x = 2
-        zoom_y = 2
-        mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
-        pix = page.get_pixmap(matrix=mat, alpha=False)
-        getpngdata = pix.tobytes(output="png")
-        # decode as np.uint8
-        image_array = np.frombuffer(getpngdata, dtype=np.uint8)
-        img = cv2.imdecode(image_array, cv2.IMREAD_ANYCOLOR)
+        zoom = 2
+        img = page.render(scale=zoom, rotation=rotate).to_pil()
+        img = img.convert("RGB")
+        img = np.array(img)
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
         xywh = crop_white_area(img)
         if xywh is not None:

paddlex/inference/models/image_feature/processors.py CHANGED Viewed

@@ -23,10 +23,9 @@ class NormalizeFeatures:
     def _normalize(self, preds):
         """normalize"""
-        feas_norm = np.sqrt(np.sum(np.square(preds[0]), axis=0, keepdims=True))
-        features = np.divide(preds[0], feas_norm)
+        feas_norm = np.sqrt(np.sum(np.square(preds), axis=1, keepdims=True))
+        features = np.divide(preds, feas_norm)
         return features
     def __call__(self, preds):
-        normalized_features = [self._normalize(feature) for feature in preds]
-        return normalized_features
+        return self._normalize(preds[0])

paddlex/inference/models/keypoint_detection/predictor.py CHANGED Viewed

@@ -26,6 +26,9 @@ from .result import KptResult
 class KptBatchSampler(ImageBatchSampler):
+    # don't support to pass pdf file as input
+    PDF_SUFFIX = []
     def sample(self, inputs):
         if not isinstance(inputs, list):
             inputs = [inputs]

paddlex/inference/models/object_detection/predictor.py CHANGED Viewed

@@ -316,6 +316,8 @@ class DetPredictor(BasePredictor):
             "BlazeFace",
             "BlazeFace-FPN-SSH",
             "PP-DocLayout-L",
+            "PP-DocLayout_plus-L",
+            "PP-DocBlockLayout",
         ]
         if any(name in self.model_name for name in models_required_imgsize):
             ordered_required_keys = (

paddlex 3.0.0rc1__py3-none-any.whl → 3.0.1__py3-none-any.whl

paddlex 3.0.0rc1py3-none-any.whl → 3.0.1py3-none-any.whl