PyPI - paddlex - Versions diffs - 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl - Mend

paddlex 3.0.2py3-none-any.whl → 3.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

paddlex/.version CHANGED Viewed

	@@ -1 +1 @@
1	- 3.0.2
1	+ 3.1.0

paddlex/configs/modules/text_recognition/eslav_PP-OCRv5_mobile_rec.yaml ADDED Viewed

@@ -0,0 +1,39 @@
+Global:
+  model: eslav_PP-OCRv5_mobile_rec
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/ocr_rec/ocr_rec_dataset_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+Train:
+  epochs_iters: 20
+  batch_size: 8
+  learning_rate: 0.001
+  pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/eslav_PP-OCRv5_mobile_rec_pretrained.pdparams
+  resume_path: null
+  log_interval: 20
+  eval_interval: 1
+  save_interval: 1
+Evaluate:
+  weight_path: "output/best_accuracy/best_accuracy.pdparams"
+  log_interval: 1
+Export:
+  weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/eslav_PP-OCRv5_mobile_rec_pretrained.pdparams
+Predict:
+  batch_size: 1
+  model_dir: "output/best_accuracy/inference"
+  input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_rec_001.png"
+  kernel_option:
+    run_mode: paddle

paddlex/configs/modules/text_recognition/korean_PP-OCRv5_mobile_rec.yaml ADDED Viewed

@@ -0,0 +1,39 @@
+Global:
+  model: korean_PP-OCRv5_mobile_rec
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/ocr_rec/ocr_rec_dataset_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+Train:
+  epochs_iters: 20
+  batch_size: 8
+  learning_rate: 0.001
+  pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/korean_PP-OCRv5_mobile_rec_pretrained.pdparams
+  resume_path: null
+  log_interval: 20
+  eval_interval: 1
+  save_interval: 1
+Evaluate:
+  weight_path: "output/best_accuracy/best_accuracy.pdparams"
+  log_interval: 1
+Export:
+  weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/korean_PP-OCRv5_mobile_rec_pretrained.pdparams
+Predict:
+  batch_size: 1
+  model_dir: "output/best_accuracy/inference"
+  input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_rec_003_korean.png"
+  kernel_option:
+    run_mode: paddle

paddlex/configs/modules/text_recognition/latin_PP-OCRv5_mobile_rec.yaml ADDED Viewed

@@ -0,0 +1,39 @@
+Global:
+  model: latin_PP-OCRv5_mobile_rec
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "/paddle/dataset/paddlex/ocr_rec/ocr_rec_dataset_examples"
+  device: gpu:0,1,2,3
+  output: "output"
+CheckDataset:
+  convert:
+    enable: False
+    src_dataset_type: null
+  split:
+    enable: False
+    train_percent: null
+    val_percent: null
+Train:
+  epochs_iters: 20
+  batch_size: 8
+  learning_rate: 0.001
+  pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/latin_PP-OCRv5_mobile_rec_pretrained.pdparams
+  resume_path: null
+  log_interval: 20
+  eval_interval: 1
+  save_interval: 1
+Evaluate:
+  weight_path: "output/best_accuracy/best_accuracy.pdparams"
+  log_interval: 1
+Export:
+  weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/latin_PP-OCRv5_mobile_rec_pretrained.pdparams
+Predict:
+  batch_size: 1
+  model_dir: "output/best_accuracy/inference"
+  input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_rec_009_latin.png"
+  kernel_option:
+    run_mode: paddle

paddlex/configs/pipelines/PP-DocTranslation.yaml ADDED Viewed

@@ -0,0 +1,261 @@
+pipeline_name: PP-DocTranslation
+use_layout_parser: True
+SubModules:
+  LLM_Chat:
+    module_name: chat_bot
+    model_name: ernie-3.5-8k
+    base_url: "https://qianfan.baidubce.com/v2"
+    api_type: openai
+    api_key: "api_key" # Set this to a real API key
+  PromptEngneering:
+    Translate_CommonText:
+      module_name: prompt_engneering
+      task_type: translate_prompt
+      task_description: '你是一位资深的多语种语言翻译专家，精通多种语言的语法、词汇、文化背景以及语言风格。你的任务是将文本从一种语言准确地转换为另一种语言，同时精准地保留原文的语义、风格和语调，确保翻译内容在目标语言中自然流畅且富有文化适应性。'
+      output_format: '输出应为翻译后的文本，并与原文保持格式一致，包括标点符号和段落结构。如果原文中包含特定的格式（如表格、公式、列表等），翻译后的文本也应保持相同的格式。'
+      rules_str: '通用规则：
+              1. 翻译应确保语义准确完整，并符合目标语言的表达习惯。
+              2. 保留原文的风格和语调，以传达相同的情感和意图。
+              3. 专有名词（如人名、地名、品牌名等）应保持不变，除非它们在目标语言中有公认的翻译。
+              4. 文化特定的表达或成语需根据目标语言的文化背景进行适当的转换或解释。
+              5. 避免使用机器翻译工具的简单直译，需根据上下文进行调整和优化。
+              6. 原文中可能包含的非文本元素（如HTML语法中的图片、表格、公式等）应保持不变。
+              7. 原文中可能包含的代码块，如编程语言代码等，应保持代码块的完整性，不要对代码进行调整。
+              8. 翻译完成后，应仔细校对，确保没有语法和拼写错误。'
+      few_shot_demo_text_content:
+      few_shot_demo_key_value_list:
+SubPipelines:
+  LayoutParser:
+    pipeline_name: PP-StructureV3
+    batch_size: 8
+    use_doc_preprocessor: True
+    use_seal_recognition: True
+    use_table_recognition: True
+    use_formula_recognition: True
+    use_chart_recognition: True
+    use_region_detection: True
+    SubModules:
+      LayoutDetection:
+        module_name: layout_detection
+        model_name: PP-DocLayout_plus-L
+        model_dir: null
+        batch_size: 8
+        threshold:
+          0: 0.3  # paragraph_title
+          1: 0.5  # image
+          2: 0.4  # text
+          3: 0.5  # number
+          4: 0.5  # abstract
+          5: 0.5  # content
+          6: 0.5  # figure_table_chart_title
+          7: 0.3  # formula
+          8: 0.5  # table
+          9: 0.5  # reference
+          10: 0.5 # doc_title
+          11: 0.5 # footnote
+          12: 0.5 # header
+          13: 0.5 # algorithm
+          14: 0.5 # footer
+          15: 0.45 # seal
+          16: 0.5 # chart
+          17: 0.5 # formula_number
+          18: 0.5 # aside_text
+          19: 0.5 # reference_content
+        layout_nms: True
+        layout_unclip_ratio: [1.0, 1.0]
+        layout_merge_bboxes_mode:
+          0: "large"  # paragraph_title
+          1: "large"  # image
+          2: "union"  # text
+          3: "union"  # number
+          4: "union"  # abstract
+          5: "union"  # content
+          6: "union"  # figure_table_chart_title
+          7: "large"  # formula
+          8: "union"  # table
+          9: "union"  # reference
+          10: "union" # doc_title
+          11: "union" # footnote
+          12: "union" # header
+          13: "union" # algorithm
+          14: "union" # footer
+          15: "union" # seal
+          16: "large" # chart
+          17: "union" # formula_number
+          18: "union" # aside_text
+          19: "union" # reference_content
+      ChartRecognition:
+        module_name: chart_recognition
+        model_name: PP-Chart2Table
+        model_dir: null
+        batch_size: 1
+      RegionDetection:
+        module_name: layout_detection
+        model_name: PP-DocBlockLayout
+        model_dir: null
+        layout_nms: True
+        layout_merge_bboxes_mode: "small"
+    SubPipelines:
+      DocPreprocessor:
+        pipeline_name: doc_preprocessor
+        batch_size: 8
+        use_doc_orientation_classify: True
+        use_doc_unwarping: True
+        SubModules:
+          DocOrientationClassify:
+            module_name: doc_text_orientation
+            model_name: PP-LCNet_x1_0_doc_ori
+            model_dir: null
+            batch_size: 8
+          DocUnwarping:
+            module_name: image_unwarping
+            model_name: UVDoc
+            model_dir: null
+      GeneralOCR:
+        pipeline_name: OCR
+        batch_size: 8
+        text_type: general
+        use_doc_preprocessor: False
+        use_textline_orientation: True
+        SubModules:
+          TextDetection:
+            module_name: text_detection
+            model_name: PP-OCRv5_server_det
+            model_dir: null
+            limit_side_len: 736
+            limit_type: min
+            max_side_limit: 4000
+            thresh: 0.3
+            box_thresh: 0.6
+            unclip_ratio: 1.5
+          TextLineOrientation:
+            module_name: textline_orientation
+            model_name: PP-LCNet_x1_0_textline_ori
+            model_dir: null
+            batch_size: 8
+          TextRecognition:
+            module_name: text_recognition
+            model_name: PP-OCRv5_server_rec
+            model_dir: null
+            batch_size: 8
+            score_thresh: 0.0
+      TableRecognition:
+        pipeline_name: table_recognition_v2
+        use_layout_detection: False
+        use_doc_preprocessor: False
+        use_ocr_model: False
+        SubModules:
+          TableClassification:
+            module_name: table_classification
+            model_name: PP-LCNet_x1_0_table_cls
+            model_dir: null
+          WiredTableStructureRecognition:
+            module_name: table_structure_recognition
+            model_name: SLANeXt_wired
+            model_dir: null
+          WirelessTableStructureRecognition:
+            module_name: table_structure_recognition
+            model_name: SLANet_plus
+            model_dir: null
+          WiredTableCellsDetection:
+            module_name: table_cells_detection
+            model_name: RT-DETR-L_wired_table_cell_det
+            model_dir: null
+          WirelessTableCellsDetection:
+            module_name: table_cells_detection
+            model_name: RT-DETR-L_wireless_table_cell_det
+            model_dir: null
+          TableOrientationClassify:
+            module_name: doc_text_orientation
+            model_name: PP-LCNet_x1_0_doc_ori
+            model_dir: null
+        SubPipelines:
+          GeneralOCR:
+            pipeline_name: OCR
+            text_type: general
+            use_doc_preprocessor: False
+            use_textline_orientation: True
+            SubModules:
+              TextDetection:
+                module_name: text_detection
+                model_name: PP-OCRv5_server_det
+                model_dir: null
+                limit_side_len: 736
+                limit_type: min
+                max_side_limit: 4000
+                thresh: 0.3
+                box_thresh: 0.4
+                unclip_ratio: 1.5
+              TextLineOrientation:
+                module_name: textline_orientation
+                model_name: PP-LCNet_x1_0_textline_ori
+                model_dir: null
+                batch_size: 8
+              TextRecognition:
+                module_name: text_recognition
+                model_name: PP-OCRv5_server_rec
+                model_dir: null
+                batch_size: 8
+            score_thresh: 0.0
+      SealRecognition:
+        pipeline_name: seal_recognition
+        batch_size: 8
+        use_layout_detection: False
+        use_doc_preprocessor: False
+        SubPipelines:
+          SealOCR:
+            pipeline_name: OCR
+            batch_size: 8
+            text_type: seal
+            use_doc_preprocessor: False
+            use_textline_orientation: False
+            SubModules:
+              TextDetection:
+                module_name: seal_text_detection
+                model_name: PP-OCRv4_server_seal_det
+                model_dir: null
+                limit_side_len: 736
+                limit_type: min
+                max_side_limit: 4000
+                thresh: 0.2
+                box_thresh: 0.6
+                unclip_ratio: 0.5
+              TextRecognition:
+                module_name: text_recognition
+                model_name: PP-OCRv5_server_rec
+                model_dir: null
+                batch_size: 8
+                score_thresh: 0
+      FormulaRecognition:
+        pipeline_name: formula_recognition
+        batch_size: 8
+        use_layout_detection: False
+        use_doc_preprocessor: False
+        SubModules:
+          FormulaRecognition:
+            module_name: formula_recognition
+            model_name: PP-FormulaNet_plus-L
+            model_dir: null
+            batch_size: 8

paddlex/inference/common/batch_sampler/__init__.py CHANGED Viewed

@@ -17,5 +17,6 @@ from .base_batch_sampler import BaseBatchSampler
 from .det_3d_batch_sampler import Det3DBatchSampler
 from .doc_vlm_batch_sampler import DocVLMBatchSampler
 from .image_batch_sampler import ImageBatchSampler
+from .markdown_batch_sampler import MarkDownBatchSampler
 from .ts_batch_sampler import TSBatchSampler
 from .video_batch_sampler import VideoBatchSampler

paddlex/inference/common/batch_sampler/markdown_batch_sampler.py ADDED Viewed

@@ -0,0 +1,116 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from pathlib import Path
+from ....utils import logging
+from ....utils.cache import CACHE_DIR
+from ....utils.download import download
+from ...utils.io import MarkDownReader
+from .base_batch_sampler import BaseBatchSampler, Batch
+class MarkDownBatchSampler(BaseBatchSampler):
+    """Batch sampler for markdown data, supporting markdown file inputs."""
+    SUFFIX = ["md", "markdown", "mdown", "mkd"]
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.md_reader = MarkDownReader()
+    def _download_from_url(self, in_path: str) -> str:
+        """Download a file from a URL to a cache directory.
+        Args:
+            in_path (str): URL of the file to be downloaded.
+        Returns:
+            str: Path to the downloaded file.
+        """
+        file_name = Path(in_path).name
+        save_path = Path(CACHE_DIR) / "predict_input" / file_name
+        download(in_path, save_path, overwrite=True)
+        return save_path.as_posix()
+    def _get_files_list(self, fp: str) -> list:
+        """Get a list of markdown files from a directory or a single file path.
+        Args:
+            fp (str): Path to a directory or a single markdown file.
+        Returns:
+            list: Sorted list of markdown file paths.
+        Raises:
+            Exception: If no markdown file is found in the path.
+        """
+        file_list = []
+        if fp is None or not os.path.exists(fp):
+            raise Exception(f"Not found any markdown file in path: {fp}")
+        if os.path.isfile(fp) and fp.split(".")[-1] in self.SUFFIX:
+            file_list.append(fp)
+        elif os.path.isdir(fp):
+            for root, dirs, files in os.walk(fp):
+                for single_file in files:
+                    if single_file.split(".")[-1] in self.SUFFIX:
+                        file_list.append(os.path.join(root, single_file))
+        if len(file_list) == 0:
+            raise Exception("Not found any file in {}".format(fp))
+        file_list = sorted(file_list)
+        return file_list
+    def sample(self, inputs: list) -> list:
+        """Generate batches of data from inputs, which can only be file paths.
+        Args:
+            inputs (list): List of markdown file paths.
+        Yields:
+            list: A batch of data which is a list of markdown file paths.
+        """
+        if not isinstance(inputs, list):
+            inputs = [inputs]
+        batch = Batch()
+        for input in inputs:
+            if isinstance(input, str):
+                suffix = input.split(".")[-1].lower()
+                file_path = (
+                    self._download_from_url(input)
+                    if input.startswith("http")
+                    else input
+                )
+                if suffix in self.SUFFIX:
+                    markdown_text = self.md_reader.read(file_path)
+                    batch.append(markdown_text, file_path)
+                    if len(batch) == self.batch_size:
+                        yield batch
+                        batch = Batch()
+                else:
+                    file_list = self._get_files_list(file_path)
+                    for file_path in file_list:
+                        markdown_text = self.md_reader.read(file_path)
+                        batch.append(markdown_text, file_path)
+                        if len(batch) == self.batch_size:
+                            yield batch
+                            batch = Batch()
+            else:
+                logging.warning(
+                    f"Not supported input data type! Only `str` is supported! So has been ignored: {input}."
+                )
+        if len(batch) > 0:
+            yield batch

paddlex/inference/common/result/base_cv_result.py CHANGED Viewed

@@ -36,6 +36,5 @@ class BaseCVResult(BaseResult, ImgMixin):
         if (page_idx := self.get("page_index", None)) is not None:
             fp = Path(fn)
             stem, suffix = fp.stem, fp.suffix
-            return f"{stem}_{page_idx}{suffix}"
-        else:
-            return fn
+            fn = f"{stem}_{page_idx}{suffix}"
+        return fn

paddlex/inference/common/result/mixin.py CHANGED Viewed

@@ -597,6 +597,8 @@ class VideoMixin:
 class MarkdownMixin:
     """Mixin class for adding Markdown handling capabilities."""
+    MARKDOWN_SAVE_KEYS = ["markdown_texts"]
     def __init__(self, *args: list, **kwargs: dict):
         """Initializes the Markdown writer and appends the save_to_markdown method to the save functions.
@@ -696,7 +698,7 @@ class MarkdownMixin:
         if data is None:
             return
         for key, value in data.items():
-            if isinstance(value, str):
+            if key in self.MARKDOWN_SAVE_KEYS:
                 save_mkd_func(save_path.as_posix(), value, *args, **kwargs)
             if isinstance(value, dict):
                 base_save_path = save_path.parent

paddlex/inference/models/base/predictor/base_predictor.py CHANGED Viewed

@@ -337,9 +337,11 @@ class BasePredictor(
             pp_option = PaddlePredictorOption(model_name=self.model_name)
         elif pp_option.model_name is None:
             pp_option.model_name = self.model_name
+            pp_option.reset_run_mode_by_default(model_name=self.model_name)
         if device_info:
             pp_option.device_type = device_info[0]
             pp_option.device_id = device_info[1]
+            pp_option.reset_run_mode_by_default(device_type=device_info[0])
         hpi_info = self.get_hpi_info()
         if hpi_info is not None:
             hpi_info = hpi_info.model_dump(exclude_unset=True)

paddlex/inference/models/common/static_infer.py CHANGED Viewed

@@ -687,6 +687,8 @@ class HPInfer(StaticInfer):
         return PaddleInfer(self._model_dir, self._model_file_prefix, option=pp_option)
     def _build_ui_runtime(self, backend, backend_config, ui_option=None):
+        # TODO: Validate the compatibility of backends with device types
         from ultra_infer import ModelFormat, Runtime, RuntimeOption
         if ui_option is None:

paddlex/inference/models/common/vlm/generation/utils.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import copy
 import inspect
-from typing import Optional, Union
+from typing import List, Optional, Union
 import paddle
 import paddle.distributed as dist
@@ -86,7 +86,7 @@ def get_scale_by_dtype(dtype: str = None, return_positive: bool = True) -> float
 def get_unfinished_flag(
     input_ids: Tensor,
     unfinished_flag: Tensor,
-    eos_token_id: Union[int, list[int], list[list[int]]],
+    eos_token_id: Union[int, List[int], List[List[int]]],
 ) -> Tensor:
     """get unfinished flag for generation step

paddlex/inference/models/formula_recognition/result.py CHANGED Viewed

@@ -27,7 +27,7 @@ from PIL import Image, ImageDraw, ImageFont
 from ....utils import logging
 from ....utils.deps import function_requires_deps, is_dep_available
 from ....utils.file_interface import custom_open
-from ....utils.fonts import PINGFANG_FONT_FILE_PATH
+from ....utils.fonts import PINGFANG_FONT
 from ...common.result import BaseCVResult, JsonMixin
 if is_dep_available("opencv-contrib-python"):
@@ -308,7 +308,7 @@ def draw_formula_module(
             return formula_img
         else:
             img_right_text = draw_box_txt_fine(
-                img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
+                img_size, box, "Rendering Failed", PINGFANG_FONT.path
             )
         return img_right_text

paddlex/inference/models/image_classification/result.py CHANGED Viewed

@@ -19,7 +19,7 @@ import numpy as np
 import PIL
 from PIL import Image, ImageDraw, ImageFont
-from ....utils.fonts import PINGFANG_FONT_FILE_PATH
+from ....utils.fonts import PINGFANG_FONT
 from ...common.result import BaseCVResult, JsonMixin
 from ...utils.color_map import get_colormap
@@ -47,9 +47,7 @@ class TopkResult(BaseCVResult):
         min_font_size = int(image_size[0] * 0.02)
         max_font_size = int(image_size[0] * 0.05)
         for font_size in range(max_font_size, min_font_size - 1, -1):
-            font = ImageFont.truetype(
-                PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8"
-            )
+            font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
             if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
                 text_width_tmp, text_height_tmp = draw.textsize(label_str, font)
             else:
@@ -58,7 +56,7 @@ class TopkResult(BaseCVResult):
             if text_width_tmp <= image_size[0]:
                 break
             else:
-                font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, min_font_size)
+                font = ImageFont.truetype(PINGFANG_FONT.path, min_font_size)
         color_list = get_colormap(rgb=True)
         color = tuple(color_list[0])
         font_color = tuple(self._get_font_colormap(3))

paddlex/inference/models/image_multilabel_classification/result.py CHANGED Viewed

@@ -18,7 +18,7 @@ import numpy as np
 import PIL
 from PIL import Image, ImageDraw, ImageFont
-from ....utils.fonts import PINGFANG_FONT_FILE_PATH
+from ....utils.fonts import PINGFANG_FONT
 from ...common.result import BaseCVResult, JsonMixin
 from ...utils.color_map import get_colormap
@@ -43,7 +43,7 @@ class MLClassResult(BaseCVResult):
         image_width, image_height = image.size
         font_size = int(image_width * 0.06)
-        font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size)
+        font = ImageFont.truetype(PINGFANG_FONT.path, font_size)
         text_lines = []
         row_width = 0
         row_height = 0

paddlex/inference/models/object_detection/result.py CHANGED Viewed

@@ -18,7 +18,7 @@ from typing import List
 import PIL
 from PIL import Image, ImageDraw, ImageFont
-from ....utils.fonts import PINGFANG_FONT_FILE_PATH
+from ....utils.fonts import PINGFANG_FONT
 from ...common.result import BaseCVResult, JsonMixin
 from ...utils.color_map import font_colormap, get_colormap
@@ -32,7 +32,7 @@ def draw_box(img: Image.Image, boxes: List[dict]) -> Image.Image:
         img (PIL.Image.Image): visualized image
     """
     font_size = int(0.018 * int(img.width)) + 2
-    font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
+    font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
     draw_thickness = int(max(img.size) * 0.002)
     draw = ImageDraw.Draw(img)

paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py CHANGED Viewed

@@ -199,6 +199,9 @@ class GroundingDINOPostProcessor(object):
         tokenized = self.tokenizer(prompt)
         if posmap.dim() == 1:
             non_zero_idx = posmap.nonzero(as_tuple=True)[0].squeeze(-1).tolist()
+            non_zero_idx = (
+                [non_zero_idx] if not isinstance(non_zero_idx, list) else non_zero_idx
+            )
             token_ids = [tokenized["input_ids"][i] for i in non_zero_idx]
             return self.tokenizer.decode(token_ids)
         else:

paddlex 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl

paddlex 3.0.2py3-none-any.whl → 3.1.0py3-none-any.whl