paddlex 3.0.3__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. paddlex/.version +1 -1
  2. paddlex/configs/modules/text_recognition/eslav_PP-OCRv5_mobile_rec.yaml +39 -0
  3. paddlex/configs/modules/text_recognition/korean_PP-OCRv5_mobile_rec.yaml +39 -0
  4. paddlex/configs/modules/text_recognition/latin_PP-OCRv5_mobile_rec.yaml +39 -0
  5. paddlex/configs/pipelines/PP-DocTranslation.yaml +261 -0
  6. paddlex/inference/common/batch_sampler/__init__.py +1 -0
  7. paddlex/inference/common/batch_sampler/markdown_batch_sampler.py +116 -0
  8. paddlex/inference/common/result/base_cv_result.py +2 -3
  9. paddlex/inference/common/result/mixin.py +3 -1
  10. paddlex/inference/models/common/vlm/generation/utils.py +2 -2
  11. paddlex/inference/models/formula_recognition/result.py +2 -2
  12. paddlex/inference/models/image_classification/result.py +3 -5
  13. paddlex/inference/models/image_multilabel_classification/result.py +2 -2
  14. paddlex/inference/models/object_detection/result.py +2 -2
  15. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +3 -0
  16. paddlex/inference/models/text_recognition/predictor.py +51 -1
  17. paddlex/inference/models/text_recognition/result.py +5 -2
  18. paddlex/inference/models/video_classification/result.py +3 -3
  19. paddlex/inference/models/video_detection/result.py +2 -4
  20. paddlex/inference/pipelines/__init__.py +1 -0
  21. paddlex/inference/pipelines/attribute_recognition/result.py +2 -2
  22. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +1 -0
  23. paddlex/inference/pipelines/components/prompt_engineering/generate_translate_prompt.py +179 -0
  24. paddlex/inference/pipelines/doc_preprocessor/result.py +2 -2
  25. paddlex/inference/pipelines/formula_recognition/result.py +2 -2
  26. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +2 -0
  27. paddlex/inference/pipelines/layout_parsing/result_v2.py +4 -2
  28. paddlex/inference/pipelines/ocr/pipeline.py +2 -0
  29. paddlex/inference/pipelines/ocr/result.py +11 -7
  30. paddlex/inference/pipelines/pp_doctranslation/__init__.py +15 -0
  31. paddlex/inference/pipelines/pp_doctranslation/pipeline.py +523 -0
  32. paddlex/inference/pipelines/pp_doctranslation/result.py +39 -0
  33. paddlex/inference/pipelines/pp_doctranslation/utils.py +260 -0
  34. paddlex/inference/pipelines/pp_shitu_v2/result.py +2 -2
  35. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +4 -2
  36. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +5 -1
  37. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +4 -2
  38. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +4 -2
  39. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +4 -2
  40. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +4 -2
  41. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +4 -2
  42. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +4 -2
  43. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +4 -2
  44. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +4 -2
  45. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +4 -2
  46. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +4 -2
  47. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +4 -2
  48. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +4 -2
  49. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -24
  50. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -26
  51. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py +203 -0
  52. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +4 -2
  53. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +4 -2
  54. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +4 -2
  55. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +4 -2
  56. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +4 -2
  57. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +4 -2
  58. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +4 -2
  59. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -2
  60. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +4 -2
  61. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +4 -2
  62. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +4 -2
  63. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +4 -2
  64. paddlex/inference/serving/schemas/anomaly_detection.py +1 -0
  65. paddlex/inference/serving/schemas/doc_preprocessor.py +1 -0
  66. paddlex/inference/serving/schemas/face_recognition.py +1 -0
  67. paddlex/inference/serving/schemas/formula_recognition.py +1 -0
  68. paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -0
  69. paddlex/inference/serving/schemas/image_classification.py +1 -0
  70. paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -0
  71. paddlex/inference/serving/schemas/instance_segmentation.py +1 -0
  72. paddlex/inference/serving/schemas/layout_parsing.py +1 -0
  73. paddlex/inference/serving/schemas/object_detection.py +1 -0
  74. paddlex/inference/serving/schemas/ocr.py +1 -0
  75. paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -0
  76. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -0
  77. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -0
  78. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +5 -4
  79. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +6 -5
  80. paddlex/inference/serving/schemas/pp_doctranslation.py +115 -0
  81. paddlex/inference/serving/schemas/pp_shituv2.py +1 -0
  82. paddlex/inference/serving/schemas/pp_structurev3.py +2 -9
  83. paddlex/inference/serving/schemas/rotated_object_detection.py +1 -0
  84. paddlex/inference/serving/schemas/seal_recognition.py +1 -0
  85. paddlex/inference/serving/schemas/semantic_segmentation.py +1 -0
  86. paddlex/inference/serving/schemas/shared/ocr.py +8 -1
  87. paddlex/inference/serving/schemas/small_object_detection.py +1 -0
  88. paddlex/inference/serving/schemas/table_recognition.py +1 -0
  89. paddlex/inference/serving/schemas/table_recognition_v2.py +1 -0
  90. paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -0
  91. paddlex/inference/serving/schemas/ts_classification.py +1 -0
  92. paddlex/inference/serving/schemas/ts_forecast.py +1 -0
  93. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -0
  94. paddlex/inference/utils/io/__init__.py +1 -0
  95. paddlex/inference/utils/io/readers.py +46 -0
  96. paddlex/inference/utils/io/writers.py +2 -0
  97. paddlex/inference/utils/official_models.py +7 -0
  98. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -2
  99. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  100. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  101. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  102. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  103. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  104. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  105. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  106. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  107. paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  108. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  109. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  110. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  111. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  112. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  113. paddlex/modules/text_recognition/model_list.py +3 -0
  114. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  115. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  116. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  117. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +27 -0
  118. paddlex/repo_manager/meta.py +3 -3
  119. paddlex/utils/device.py +4 -1
  120. paddlex/utils/{fonts/__init__.py → fonts.py} +45 -26
  121. {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/METADATA +25 -1
  122. {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/RECORD +126 -114
  123. {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/LICENSE +0 -0
  124. {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/WHEEL +0 -0
  125. {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/entry_points.txt +0 -0
  126. {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/top_level.txt +0 -0
@@ -57,6 +57,7 @@ class AnalyzeImagesRequest(ocr.BaseInferRequest):
57
57
  sealDetBoxThresh: Optional[float] = None
58
58
  sealDetUnclipRatio: Optional[float] = None
59
59
  sealRecScoreThresh: Optional[float] = None
60
+ visualize: Optional[bool] = None
60
61
 
61
62
 
62
63
  class LayoutParsingResult(BaseModel):
@@ -78,8 +79,8 @@ BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
78
79
 
79
80
  class BuildVectorStoreRequest(BaseModel):
80
81
  visualInfo: List[dict]
81
- minCharacters: Optional[int] = None
82
- blockSize: Optional[int] = None
82
+ minCharacters: int = 3500
83
+ blockSize: int = 300
83
84
  retrieverConfig: Optional[dict] = None
84
85
 
85
86
 
@@ -93,9 +94,9 @@ CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
93
94
  class ChatRequest(BaseModel):
94
95
  keyList: List[str]
95
96
  visualInfo: List[dict]
96
- useVectorRetrieval: Optional[bool] = None
97
+ useVectorRetrieval: bool = True
97
98
  vectorInfo: Optional[dict] = None
98
- minCharacters: Optional[int] = None
99
+ minCharacters: int = 3500
99
100
  textTaskDescription: Optional[str] = None
100
101
  textOutputFormat: Optional[str] = None
101
102
  # Is the "Str" in the name unnecessary? Keep the names consistent with the
@@ -61,6 +61,7 @@ class AnalyzeImagesRequest(ocr.BaseInferRequest):
61
61
  sealDetBoxThresh: Optional[float] = None
62
62
  sealDetUnclipRatio: Optional[float] = None
63
63
  sealRecScoreThresh: Optional[float] = None
64
+ visualize: Optional[bool] = None
64
65
 
65
66
 
66
67
  class LayoutParsingResult(BaseModel):
@@ -80,8 +81,8 @@ BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
80
81
 
81
82
  class BuildVectorStoreRequest(BaseModel):
82
83
  visualInfo: List[dict]
83
- minCharacters: Optional[int] = None
84
- blockSize: Optional[int] = None
84
+ minCharacters: int = 3500
85
+ blockSize: int = 300
85
86
  retrieverConfig: Optional[dict] = None
86
87
 
87
88
 
@@ -108,9 +109,9 @@ CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
108
109
  class ChatRequest(BaseModel):
109
110
  keyList: List[str]
110
111
  visualInfo: List[dict]
111
- useVectorRetrieval: Optional[bool] = None
112
+ useVectorRetrieval: bool = True
112
113
  vectorInfo: Optional[dict] = None
113
- minCharacters: Optional[int] = None
114
+ minCharacters: int = 3500
114
115
  textTaskDescription: Optional[str] = None
115
116
  textOutputFormat: Optional[str] = None
116
117
  textRulesStr: Optional[str] = None
@@ -122,7 +123,7 @@ class ChatRequest(BaseModel):
122
123
  tableFewShotDemoTextContent: Optional[str] = None
123
124
  tableFewShotDemoKeyValueList: Optional[str] = None
124
125
  mllmPredictInfo: Optional[dict] = None
125
- mllmIntegrationStrategy: Optional[str] = None
126
+ mllmIntegrationStrategy: str = "integration"
126
127
  chatBotConfig: Optional[dict] = None
127
128
  retrieverConfig: Optional[dict] = None
128
129
 
@@ -0,0 +1,115 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Dict, Final, List, Optional, Tuple, Union
16
+
17
+ from pydantic import BaseModel
18
+
19
+ from ..infra.models import DataInfo, PrimaryOperations
20
+ from .shared import ocr
21
+
22
+ __all__ = [
23
+ "ANALYZE_IMAGES_ENDPOINT",
24
+ "AnalyzeImagesRequest",
25
+ "LayoutParsingResult",
26
+ "AnalyzeImagesResult",
27
+ "TRANSLATE_ENDPOINT",
28
+ "TranslateRequest",
29
+ "TranslationResult",
30
+ "TranslateResult",
31
+ "PRIMARY_OPERATIONS",
32
+ ]
33
+
34
+ ANALYZE_IMAGES_ENDPOINT: Final[str] = "/doctrans-visual"
35
+
36
+
37
+ class AnalyzeImagesRequest(ocr.BaseInferRequest):
38
+ useDocOrientationClassify: Optional[bool] = False
39
+ useDocUnwarping: Optional[bool] = False
40
+ useTextlineOrientation: Optional[bool] = None
41
+ useSealRecognition: Optional[bool] = None
42
+ useTableRecognition: Optional[bool] = None
43
+ useFormulaRecognition: Optional[bool] = None
44
+ useChartRecognition: Optional[bool] = False
45
+ useRegionDetection: Optional[bool] = None
46
+ layoutThreshold: Optional[Union[float, dict]] = None
47
+ layoutNms: Optional[bool] = None
48
+ layoutUnclipRatio: Optional[Union[float, Tuple[float, float], dict]] = None
49
+ layoutMergeBboxesMode: Optional[Union[str, dict]] = None
50
+ textDetLimitSideLen: Optional[int] = None
51
+ textDetLimitType: Optional[str] = None
52
+ textDetThresh: Optional[float] = None
53
+ textDetBoxThresh: Optional[float] = None
54
+ textDetUnclipRatio: Optional[float] = None
55
+ textRecScoreThresh: Optional[float] = None
56
+ sealDetLimitSideLen: Optional[int] = None
57
+ sealDetLimitType: Optional[str] = None
58
+ sealDetThresh: Optional[float] = None
59
+ sealDetBoxThresh: Optional[float] = None
60
+ sealDetUnclipRatio: Optional[float] = None
61
+ sealRecScoreThresh: Optional[float] = None
62
+ useWiredTableCellsTransToHtml: bool = False
63
+ useWirelessTableCellsTransToHtml: bool = False
64
+ useTableOrientationClassify: bool = True
65
+ useOcrResultsWithTableCells: bool = True
66
+ useE2eWiredTableRecModel: bool = False
67
+ useE2eWirelessTableRecModel: bool = True
68
+ visualize: Optional[bool] = None
69
+
70
+
71
+ class LayoutParsingResult(BaseModel):
72
+ prunedResult: dict
73
+ markdown: ocr.MarkdownData
74
+ outputImages: Optional[Dict[str, str]] = None
75
+ inputImage: Optional[str] = None
76
+
77
+
78
+ class AnalyzeImagesResult(BaseModel):
79
+ layoutParsingResults: List[LayoutParsingResult]
80
+ dataInfo: DataInfo
81
+
82
+
83
+ TRANSLATE_ENDPOINT: Final[str] = "/doctrans-translate"
84
+
85
+
86
+ class TranslateRequest(BaseModel):
87
+ markdownList: List[ocr.MarkdownData]
88
+ targetLanguage: str = "zh"
89
+ chunkSize: int = 5000
90
+ taskDescription: Optional[str] = None
91
+ outputFormat: Optional[str] = None
92
+ rulesStr: Optional[str] = None
93
+ fewShotDemoTextContent: Optional[str] = None
94
+ fewShotDemoKeyValueList: Optional[str] = None
95
+ chatBotConfig: Optional[dict] = None
96
+ sleepInterval: float = 0
97
+
98
+
99
+ class TranslationResult(BaseModel):
100
+ language: str
101
+ markdown: ocr.MarkdownData
102
+
103
+
104
+ class TranslateResult(BaseModel):
105
+ translationResults: List[TranslationResult]
106
+
107
+
108
+ PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
109
+ "analyzeImages": (
110
+ ANALYZE_IMAGES_ENDPOINT,
111
+ AnalyzeImagesRequest,
112
+ AnalyzeImagesResult,
113
+ ),
114
+ "translate": (TRANSLATE_ENDPOINT, TranslateRequest, TranslateResult),
115
+ }
@@ -90,6 +90,7 @@ class InferRequest(BaseModel):
90
90
  recThreshold: Optional[float] = None
91
91
  hammingRadius: Optional[float] = None
92
92
  topk: Optional[int] = None
93
+ visualize: Optional[bool] = None
93
94
 
94
95
 
95
96
  class RecResult(BaseModel):
@@ -22,7 +22,6 @@ from .shared import ocr
22
22
  __all__ = [
23
23
  "INFER_ENDPOINT",
24
24
  "InferRequest",
25
- "MarkdownData",
26
25
  "LayoutParsingResult",
27
26
  "InferResult",
28
27
  "PRIMARY_OPERATIONS",
@@ -62,18 +61,12 @@ class InferRequest(ocr.BaseInferRequest):
62
61
  useOcrResultsWithTableCells: bool = True
63
62
  useE2eWiredTableRecModel: bool = False
64
63
  useE2eWirelessTableRecModel: bool = True
65
-
66
-
67
- class MarkdownData(BaseModel):
68
- text: str
69
- images: Dict[str, str]
70
- isStart: bool
71
- isEnd: bool
64
+ visualize: Optional[bool] = None
72
65
 
73
66
 
74
67
  class LayoutParsingResult(BaseModel):
75
68
  prunedResult: dict
76
- markdown: MarkdownData
69
+ markdown: ocr.MarkdownData
77
70
  outputImages: Optional[Dict[str, str]] = None
78
71
  inputImage: Optional[str] = None
79
72
 
@@ -33,6 +33,7 @@ INFER_ENDPOINT: Final[str] = "/rotated-object-detection"
33
33
  class InferRequest(BaseModel):
34
34
  image: str
35
35
  threshold: Optional[Union[float, Dict[int, float]]] = None
36
+ visualize: Optional[bool] = None
36
37
 
37
38
 
38
39
  class DetectedObject(BaseModel):
@@ -44,6 +44,7 @@ class InferRequest(ocr.BaseInferRequest):
44
44
  sealDetBoxThresh: Optional[float] = None
45
45
  sealDetUnclipRatio: Optional[float] = None
46
46
  sealRecScoreThresh: Optional[float] = None
47
+ visualize: Optional[bool] = None
47
48
 
48
49
 
49
50
  class SealRecResult(BaseModel):
@@ -32,6 +32,7 @@ INFER_ENDPOINT: Final[str] = "/semantic-segmentation"
32
32
  class InferRequest(BaseModel):
33
33
  image: str
34
34
  targetSize: Optional[Union[int, image_segmentation.Size]] = None
35
+ visualize: Optional[bool] = None
35
36
 
36
37
 
37
38
  class InferResult(BaseModel):
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional
15
+ from typing import Dict, Optional
16
16
 
17
17
  from pydantic import BaseModel
18
18
  from typing_extensions import Literal, TypeAlias
@@ -23,3 +23,10 @@ FileType: TypeAlias = Literal[0, 1]
23
23
  class BaseInferRequest(BaseModel):
24
24
  file: str
25
25
  fileType: Optional[FileType] = None
26
+
27
+
28
+ class MarkdownData(BaseModel):
29
+ text: str
30
+ isStart: bool
31
+ isEnd: bool
32
+ images: Optional[Dict[str, str]] = None
@@ -33,6 +33,7 @@ INFER_ENDPOINT: Final[str] = "/small-object-detection"
33
33
  class InferRequest(BaseModel):
34
34
  image: str
35
35
  threshold: Optional[Union[float, Dict[int, float]]] = None
36
+ visualize: Optional[bool] = None
36
37
 
37
38
 
38
39
  class DetectedObject(BaseModel):
@@ -42,6 +42,7 @@ class InferRequest(ocr.BaseInferRequest):
42
42
  textDetUnclipRatio: Optional[float] = None
43
43
  textRecScoreThresh: Optional[float] = None
44
44
  useOcrResultsWithTableCells: bool = False
45
+ visualize: Optional[bool] = None
45
46
 
46
47
 
47
48
  class TableRecResult(BaseModel):
@@ -47,6 +47,7 @@ class InferRequest(ocr.BaseInferRequest):
47
47
  useWirelessTableCellsTransToHtml: bool = False
48
48
  useTableOrientationClassify: bool = True
49
49
  useOcrResultsWithTableCells: bool = True
50
+ visualize: Optional[bool] = None
50
51
 
51
52
 
52
53
  class TableRecResult(BaseModel):
@@ -25,6 +25,7 @@ INFER_ENDPOINT: Final[str] = "/time-series-anomaly-detection"
25
25
 
26
26
  class InferRequest(BaseModel):
27
27
  csv: str
28
+ visualize: Optional[bool] = None
28
29
 
29
30
 
30
31
  class InferResult(BaseModel):
@@ -25,6 +25,7 @@ INFER_ENDPOINT: Final[str] = "/time-series-classification"
25
25
 
26
26
  class InferRequest(BaseModel):
27
27
  csv: str
28
+ visualize: Optional[bool] = None
28
29
 
29
30
 
30
31
  class InferResult(BaseModel):
@@ -25,6 +25,7 @@ INFER_ENDPOINT: Final[str] = "/time-series-forecasting"
25
25
 
26
26
  class InferRequest(BaseModel):
27
27
  csv: str
28
+ visualize: Optional[bool] = None
28
29
 
29
30
 
30
31
  class InferResult(BaseModel):
@@ -35,6 +35,7 @@ INFER_ENDPOINT: Final[str] = "/vehicle-attribute-recognition"
35
35
  class InferRequest(BaseModel):
36
36
  image: str
37
37
  detThreshold: Optional[float] = None
38
+ visualize: Optional[bool] = None
38
39
  clsThreshold: Optional[
39
40
  Union[float, Dict[Union[Literal["default"], int], float], List[float]]
40
41
  ] = None
@@ -17,6 +17,7 @@ from .readers import (
17
17
  AudioReader,
18
18
  CSVReader,
19
19
  ImageReader,
20
+ MarkDownReader,
20
21
  PDFReader,
21
22
  ReaderType,
22
23
  VideoReader,
@@ -52,6 +52,8 @@ class ReaderType(enum.Enum):
52
52
  TS = 5
53
53
  PDF = 6
54
54
  YAML = 8
55
+ MARKDOWN = 9
56
+ TXT = 10
55
57
 
56
58
 
57
59
  class _BaseReader(object):
@@ -206,6 +208,41 @@ class YAMLReader(_BaseReader):
206
208
  return ReaderType.YAML
207
209
 
208
210
 
211
+ class MarkDownReader(_BaseReader):
212
+
213
+ def __init__(self, backend="Markdown", **bk_args):
214
+ super().__init__(backend, **bk_args)
215
+
216
+ def read(self, in_path):
217
+ return self._backend.read_file(str(in_path))
218
+
219
+ def _init_backend(self, bk_type, bk_args):
220
+ if bk_type == "Markdown":
221
+ return TXTReaderBackend(**bk_args)
222
+ else:
223
+ raise ValueError("Unsupported backend type")
224
+
225
+ def get_type(self):
226
+ return ReaderType.MARKDOWN
227
+
228
+
229
+ class TXTReader(_BaseReader):
230
+ """TXTReader"""
231
+
232
+ def __init__(self, backend="txt", **bk_args):
233
+ super().__init__(backend, **bk_args)
234
+
235
+ def read(self, in_path):
236
+ return self._backend.read_file(str(in_path))
237
+
238
+ def _init_backend(self, bk_type, bk_args):
239
+ if bk_type == "txt":
240
+ return TXTReaderBackend(**bk_args)
241
+
242
+ def get_type(self):
243
+ return ReaderType.TXT
244
+
245
+
209
246
  class _BaseReaderBackend(object):
210
247
  """_BaseReaderBackend"""
211
248
 
@@ -261,6 +298,15 @@ class PDFReaderBackend(_BaseReaderBackend):
261
298
  yield img_cv
262
299
 
263
300
 
301
+ class TXTReaderBackend(_BaseReaderBackend):
302
+ """TXTReaderBackend"""
303
+
304
+ def read_file(self, in_path):
305
+ with open(in_path, "r") as f:
306
+ data = f.read()
307
+ return data
308
+
309
+
264
310
  class _VideoReaderBackend(_BaseReaderBackend):
265
311
  """_VideoReaderBackend"""
266
312
 
@@ -54,6 +54,8 @@ class WriterType(enum.Enum):
54
54
  XLSX = 6
55
55
  CSV = 7
56
56
  YAML = 8
57
+ MARKDOWN = 9
58
+ TXT = 10
57
59
 
58
60
 
59
61
  class _BaseWriter(object):
@@ -359,7 +359,11 @@ PP-LCNet_x1_0_vehicle_attribute_infer.tar",
359
359
  PP-OCRv5_server_rec_infer.tar",
360
360
  "PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/\
361
361
  PP-OCRv5_mobile_rec_infer.tar",
362
+ "eslav_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/\
363
+ eslav_PP-OCRv5_mobile_rec_infer.tar",
362
364
  "PP-DocBee2-3B": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-DocBee2-3B_infer.tar",
365
+ "latin_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/latin_PP-OCRv5_mobile_rec_infer.tar",
366
+ "korean_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/korean_PP-OCRv5_mobile_rec_infer.tar",
363
367
  }
364
368
 
365
369
 
@@ -384,6 +388,7 @@ HUGGINGFACE_MODELS = [
384
388
  "PicoDet-S_layout_17cls",
385
389
  "PicoDet-S_layout_3cls",
386
390
  "PP-DocBee2-3B",
391
+ "PP-Chart2Table",
387
392
  "PP-DocBee-2B",
388
393
  "PP-DocBee-7B",
389
394
  "PP-DocBlockLayout",
@@ -396,8 +401,10 @@ HUGGINGFACE_MODELS = [
396
401
  "PP-FormulaNet_plus-M",
397
402
  "PP-FormulaNet_plus-S",
398
403
  "PP-FormulaNet-S",
404
+ "PP-LCNet_x0_25_textline_ori",
399
405
  "PP-LCNet_x1_0_doc_ori",
400
406
  "PP-LCNet_x1_0_table_cls",
407
+ "PP-LCNet_x1_0_textline_ori",
401
408
  "PP-OCRv3_mobile_det",
402
409
  "PP-OCRv3_mobile_rec",
403
410
  "PP-OCRv3_server_det",
@@ -100,9 +100,9 @@ def convert_labelme_dataset(input_dir):
100
100
  if not os.path.exists(img_path):
101
101
  logging.info("%s is not existed, skip this image" % img_path)
102
102
  continue
103
- img_name = img_path.split("/")[-1]
103
+ img_name = osp.basename(img_path)
104
104
  img_file_list.append(f"images/{img_name}")
105
- label_img_name = annotated_img_path.split("/")[-1]
105
+ label_img_name = osp.basename(annotated_img_path)
106
106
  label_file_list.append(f"annotations/{label_img_name}")
107
107
 
108
108
  img = np.asarray(cv2.imread(img_path))
@@ -16,7 +16,7 @@ import numpy as np
16
16
  import PIL
17
17
  from PIL import ImageDraw, ImageFont
18
18
 
19
- from ......utils.fonts import PINGFANG_FONT_FILE_PATH
19
+ from ......utils.fonts import PINGFANG_FONT
20
20
 
21
21
 
22
22
  def colormap(rgb=False):
@@ -114,7 +114,7 @@ def draw_label(image, label, label_map_dict):
114
114
  min_font_size = int(image_size[0] * 0.02)
115
115
  max_font_size = int(image_size[0] * 0.05)
116
116
  for font_size in range(max_font_size, min_font_size - 1, -1):
117
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
117
+ font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
118
118
  if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
119
119
  text_width_tmp, text_height_tmp = draw.textsize(
120
120
  label_map_dict[int(label)], font
@@ -127,7 +127,7 @@ def draw_label(image, label, label_map_dict):
127
127
  if text_width_tmp <= image_size[0]:
128
128
  break
129
129
  else:
130
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, min_font_size)
130
+ font = ImageFont.truetype(PINGFANG_FONT.path, min_font_size)
131
131
  color_list = colormap(rgb=True)
132
132
  color = tuple(color_list[0])
133
133
  font_color = tuple(font_colormap(3))
@@ -22,7 +22,7 @@ import numpy as np
22
22
 
23
23
  from .....utils.deps import function_requires_deps, is_dep_available
24
24
  from .....utils.file_interface import custom_open
25
- from .....utils.fonts import PINGFANG_FONT_FILE_PATH
25
+ from .....utils.fonts import PINGFANG_FONT
26
26
  from .....utils.logging import warning
27
27
 
28
28
  if is_dep_available("opencv-contrib-python"):
@@ -128,7 +128,7 @@ def deep_analyse(dataset_path, output, datatype="FormulaRecDataset"):
128
128
  if os_system == "windows":
129
129
  plt.rcParams["font.sans-serif"] = "FangSong"
130
130
  else:
131
- font = font_manager.FontProperties(fname=PINGFANG_FONT_FILE_PATH, size=15)
131
+ font = font_manager.FontProperties(fname=PINGFANG_FONT.path, size=15)
132
132
 
133
133
  fig, ax = plt.subplots(figsize=(15, 9), dpi=120)
134
134
  xlabel_name = "公式长度区间"
@@ -19,7 +19,7 @@ import numpy as np
19
19
 
20
20
  from .....utils.deps import function_requires_deps, is_dep_available
21
21
  from .....utils.file_interface import custom_open
22
- from .....utils.fonts import PINGFANG_FONT_FILE_PATH
22
+ from .....utils.fonts import PINGFANG_FONT
23
23
 
24
24
  if is_dep_available("matplotlib"):
25
25
  import matplotlib.pyplot as plt
@@ -52,7 +52,7 @@ def deep_analyse(dataset_path, output, dataset_type="ShiTuRec"):
52
52
  if os_system == "windows":
53
53
  plt.rcParams["font.sans-serif"] = "FangSong"
54
54
  else:
55
- font = font_manager.FontProperties(fname=PINGFANG_FONT_FILE_PATH, size=10)
55
+ font = font_manager.FontProperties(fname=PINGFANG_FONT.path, size=10)
56
56
 
57
57
  x = np.arange(len(categories)) # 标签位置
58
58
  width = 0.35 # 每个条形的宽度
@@ -16,7 +16,7 @@ import numpy as np
16
16
  import PIL
17
17
  from PIL import ImageDraw, ImageFont
18
18
 
19
- from ......utils.fonts import PINGFANG_FONT_FILE_PATH
19
+ from ......utils.fonts import PINGFANG_FONT
20
20
 
21
21
 
22
22
  def colormap(rgb=False):
@@ -114,7 +114,7 @@ def draw_label(image, label):
114
114
  min_font_size = int(image_size[0] * 0.02)
115
115
  max_font_size = int(image_size[0] * 0.05)
116
116
  for font_size in range(max_font_size, min_font_size - 1, -1):
117
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
117
+ font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
118
118
  if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
119
119
  text_width_tmp, text_height_tmp = draw.textsize(label, font)
120
120
  else:
@@ -123,7 +123,7 @@ def draw_label(image, label):
123
123
  if text_width_tmp <= image_size[0]:
124
124
  break
125
125
  else:
126
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, min_font_size)
126
+ font = ImageFont.truetype(PINGFANG_FONT.path, min_font_size)
127
127
  color_list = colormap(rgb=True)
128
128
  color = tuple(color_list[0])
129
129
  font_color = tuple(font_colormap(3))
@@ -20,7 +20,7 @@ import numpy as np
20
20
 
21
21
  from .....utils.deps import function_requires_deps, is_dep_available
22
22
  from .....utils.file_interface import custom_open
23
- from .....utils.fonts import PINGFANG_FONT_FILE_PATH
23
+ from .....utils.fonts import PINGFANG_FONT
24
24
 
25
25
  if is_dep_available("matplotlib"):
26
26
  import matplotlib.pyplot as plt
@@ -68,7 +68,7 @@ def deep_analyse(dataset_path, output):
68
68
  if os_system == "windows":
69
69
  plt.rcParams["font.sans-serif"] = "FangSong"
70
70
  else:
71
- font = font_manager.FontProperties(fname=PINGFANG_FONT_FILE_PATH, size=10)
71
+ font = font_manager.FontProperties(fname=PINGFANG_FONT.path, size=10)
72
72
  fig, ax = plt.subplots(figsize=(max(8, int(len(classes) / 5)), 5), dpi=300)
73
73
  ax.bar(x, cnts_train_sorted, width=0.5, label="train")
74
74
  ax.bar(x + width, cnts_val_sorted, width=0.5, label="val")
@@ -16,7 +16,7 @@ import numpy as np
16
16
  import PIL
17
17
  from PIL import ImageDraw, ImageFont
18
18
 
19
- from ......utils.fonts import PINGFANG_FONT_FILE_PATH
19
+ from ......utils.fonts import PINGFANG_FONT
20
20
 
21
21
 
22
22
  def colormap(rgb=False):
@@ -114,7 +114,7 @@ def draw_label(image, label, label_map_dict):
114
114
  min_font_size = int(image_size[0] * 0.02)
115
115
  max_font_size = int(image_size[0] * 0.05)
116
116
  for font_size in range(max_font_size, min_font_size - 1, -1):
117
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
117
+ font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
118
118
  if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
119
119
  text_width_tmp, text_height_tmp = draw.textsize(
120
120
  label_map_dict[int(label)], font
@@ -127,7 +127,7 @@ def draw_label(image, label, label_map_dict):
127
127
  if text_width_tmp <= image_size[0]:
128
128
  break
129
129
  else:
130
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, min_font_size)
130
+ font = ImageFont.truetype(PINGFANG_FONT.path, min_font_size)
131
131
  color_list = colormap(rgb=True)
132
132
  color = tuple(color_list[0])
133
133
  font_color = tuple(font_colormap(3))
@@ -20,7 +20,7 @@ from collections import defaultdict
20
20
  import numpy as np
21
21
 
22
22
  from .....utils.deps import function_requires_deps, is_dep_available
23
- from .....utils.fonts import PINGFANG_FONT_FILE_PATH
23
+ from .....utils.fonts import PINGFANG_FONT
24
24
 
25
25
  if is_dep_available("matplotlib"):
26
26
  import matplotlib.pyplot as plt
@@ -64,7 +64,7 @@ def deep_analyse(dataset_dir, output):
64
64
  if os_system == "windows":
65
65
  plt.rcParams["font.sans-serif"] = "FangSong"
66
66
  else:
67
- font = font_manager.FontProperties(fname=PINGFANG_FONT_FILE_PATH)
67
+ font = font_manager.FontProperties(fname=PINGFANG_FONT.path)
68
68
  fig, ax = plt.subplots(figsize=(max(8, int(len(classes) / 5)), 5), dpi=120)
69
69
  ax.bar(x, cnts_train_sorted, width=0.5, label="train")
70
70
  ax.bar(x + width, cnts_val_sorted, width=0.5, label="val")
@@ -18,7 +18,7 @@ from PIL import Image, ImageDraw, ImageFont
18
18
 
19
19
  from ......utils import logging
20
20
  from ......utils.deps import function_requires_deps, is_dep_available
21
- from ......utils.fonts import PINGFANG_FONT_FILE_PATH
21
+ from ......utils.fonts import PINGFANG_FONT
22
22
 
23
23
  if is_dep_available("pycocotools"):
24
24
  from pycocotools.coco import COCO
@@ -124,7 +124,7 @@ def draw_bbox(image, coco_info: "COCO", img_id):
124
124
  font_size = int(0.024 * int(image_info["width"])) + 2
125
125
  except:
126
126
  font_size = 12
127
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
127
+ font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
128
128
 
129
129
  image = image.convert("RGB")
130
130
  draw = ImageDraw.Draw(image)
@@ -21,7 +21,7 @@ from collections import defaultdict
21
21
  import numpy as np
22
22
 
23
23
  from paddlex.utils.deps import function_requires_deps, is_dep_available
24
- from paddlex.utils.fonts import PINGFANG_FONT_FILE_PATH
24
+ from paddlex.utils.fonts import PINGFANG_FONT
25
25
 
26
26
  if is_dep_available("matplotlib"):
27
27
  import matplotlib.pyplot as plt
@@ -88,7 +88,7 @@ def deep_analyse(dataset_dir, output):
88
88
  if os_system == "windows":
89
89
  plt.rcParams["font.sans-serif"] = "FangSong"
90
90
  else:
91
- font = font_manager.FontProperties(fname=PINGFANG_FONT_FILE_PATH)
91
+ font = font_manager.FontProperties(fname=PINGFANG_FONT.path)
92
92
  fig, ax = plt.subplots(figsize=(max(8, int(len(classes) / 5)), 5), dpi=120)
93
93
  ax.bar(x, cnts_train_sorted, width=0.5, label="train")
94
94
  ax.bar(x + width, cnts_val_sorted, width=0.5, label="val")