paddlex 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. paddlex/.version +1 -1
  2. paddlex/configs/modules/text_recognition/eslav_PP-OCRv5_mobile_rec.yaml +39 -0
  3. paddlex/configs/modules/text_recognition/korean_PP-OCRv5_mobile_rec.yaml +39 -0
  4. paddlex/configs/modules/text_recognition/latin_PP-OCRv5_mobile_rec.yaml +39 -0
  5. paddlex/configs/pipelines/PP-DocTranslation.yaml +261 -0
  6. paddlex/inference/common/batch_sampler/__init__.py +1 -0
  7. paddlex/inference/common/batch_sampler/markdown_batch_sampler.py +116 -0
  8. paddlex/inference/common/result/base_cv_result.py +2 -3
  9. paddlex/inference/common/result/mixin.py +3 -1
  10. paddlex/inference/models/base/predictor/base_predictor.py +2 -0
  11. paddlex/inference/models/common/static_infer.py +2 -0
  12. paddlex/inference/models/common/vlm/generation/utils.py +2 -2
  13. paddlex/inference/models/formula_recognition/result.py +2 -2
  14. paddlex/inference/models/image_classification/result.py +3 -5
  15. paddlex/inference/models/image_multilabel_classification/result.py +2 -2
  16. paddlex/inference/models/object_detection/result.py +2 -2
  17. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +3 -0
  18. paddlex/inference/models/text_recognition/predictor.py +51 -1
  19. paddlex/inference/models/text_recognition/result.py +5 -2
  20. paddlex/inference/models/video_classification/result.py +3 -3
  21. paddlex/inference/models/video_detection/result.py +2 -4
  22. paddlex/inference/pipelines/__init__.py +1 -0
  23. paddlex/inference/pipelines/attribute_recognition/result.py +2 -2
  24. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +1 -0
  25. paddlex/inference/pipelines/components/prompt_engineering/generate_translate_prompt.py +179 -0
  26. paddlex/inference/pipelines/doc_preprocessor/result.py +2 -2
  27. paddlex/inference/pipelines/formula_recognition/result.py +2 -2
  28. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +2 -0
  29. paddlex/inference/pipelines/layout_parsing/result_v2.py +11 -4
  30. paddlex/inference/pipelines/ocr/pipeline.py +2 -0
  31. paddlex/inference/pipelines/ocr/result.py +11 -7
  32. paddlex/inference/pipelines/pp_doctranslation/__init__.py +15 -0
  33. paddlex/inference/pipelines/pp_doctranslation/pipeline.py +523 -0
  34. paddlex/inference/pipelines/pp_doctranslation/result.py +39 -0
  35. paddlex/inference/pipelines/pp_doctranslation/utils.py +260 -0
  36. paddlex/inference/pipelines/pp_shitu_v2/result.py +2 -2
  37. paddlex/inference/serving/basic_serving/_app.py +1 -0
  38. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +4 -2
  39. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +5 -1
  40. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +4 -2
  41. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +4 -2
  42. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +4 -2
  43. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +4 -2
  44. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +4 -2
  45. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +4 -2
  46. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +4 -2
  47. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +4 -2
  48. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +4 -2
  49. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +4 -2
  50. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +4 -2
  51. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +4 -2
  52. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -24
  53. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -26
  54. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py +203 -0
  55. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +4 -2
  56. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +4 -2
  57. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +4 -2
  58. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +4 -2
  59. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +4 -2
  60. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +4 -2
  61. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +4 -2
  62. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -2
  63. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +4 -2
  64. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +4 -2
  65. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +4 -2
  66. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +4 -2
  67. paddlex/inference/serving/infra/utils.py +22 -17
  68. paddlex/inference/serving/schemas/anomaly_detection.py +1 -0
  69. paddlex/inference/serving/schemas/doc_preprocessor.py +1 -0
  70. paddlex/inference/serving/schemas/face_recognition.py +1 -0
  71. paddlex/inference/serving/schemas/formula_recognition.py +1 -0
  72. paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -0
  73. paddlex/inference/serving/schemas/image_classification.py +1 -0
  74. paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -0
  75. paddlex/inference/serving/schemas/instance_segmentation.py +1 -0
  76. paddlex/inference/serving/schemas/layout_parsing.py +1 -0
  77. paddlex/inference/serving/schemas/object_detection.py +1 -0
  78. paddlex/inference/serving/schemas/ocr.py +1 -0
  79. paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -0
  80. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -0
  81. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -0
  82. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +5 -4
  83. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +6 -5
  84. paddlex/inference/serving/schemas/pp_doctranslation.py +115 -0
  85. paddlex/inference/serving/schemas/pp_shituv2.py +1 -0
  86. paddlex/inference/serving/schemas/pp_structurev3.py +2 -9
  87. paddlex/inference/serving/schemas/rotated_object_detection.py +1 -0
  88. paddlex/inference/serving/schemas/seal_recognition.py +1 -0
  89. paddlex/inference/serving/schemas/semantic_segmentation.py +1 -0
  90. paddlex/inference/serving/schemas/shared/ocr.py +8 -1
  91. paddlex/inference/serving/schemas/small_object_detection.py +1 -0
  92. paddlex/inference/serving/schemas/table_recognition.py +1 -0
  93. paddlex/inference/serving/schemas/table_recognition_v2.py +1 -0
  94. paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -0
  95. paddlex/inference/serving/schemas/ts_classification.py +1 -0
  96. paddlex/inference/serving/schemas/ts_forecast.py +1 -0
  97. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -0
  98. paddlex/inference/utils/hpi.py +42 -14
  99. paddlex/inference/utils/hpi_model_info_collection.json +0 -2
  100. paddlex/inference/utils/io/__init__.py +1 -0
  101. paddlex/inference/utils/io/readers.py +46 -0
  102. paddlex/inference/utils/io/writers.py +2 -0
  103. paddlex/inference/utils/official_models.py +7 -0
  104. paddlex/inference/utils/pp_option.py +34 -18
  105. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -2
  106. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  107. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  108. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  109. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  110. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  111. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  112. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  113. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  114. paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  115. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  116. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  117. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  118. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  119. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  120. paddlex/modules/text_recognition/model_list.py +3 -0
  121. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  122. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  123. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  124. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +27 -0
  125. paddlex/repo_manager/meta.py +3 -3
  126. paddlex/utils/device.py +4 -1
  127. paddlex/utils/download.py +10 -7
  128. paddlex/utils/{fonts/__init__.py → fonts.py} +45 -26
  129. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/METADATA +25 -1
  130. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/RECORD +134 -122
  131. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/LICENSE +0 -0
  132. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/WHEEL +0 -0
  133. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/entry_points.txt +0 -0
  134. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,7 @@ INFER_ENDPOINT: Final[str] = "/instance-segmentation"
33
33
  class InferRequest(BaseModel):
34
34
  image: str
35
35
  threshold: Optional[float] = None
36
+ visualize: Optional[bool] = None
36
37
 
37
38
 
38
39
  class Instance(BaseModel):
@@ -53,6 +53,7 @@ class InferRequest(ocr.BaseInferRequest):
53
53
  sealDetBoxThresh: Optional[float] = None
54
54
  sealDetUnclipRatio: Optional[float] = None
55
55
  sealRecScoreThresh: Optional[float] = None
56
+ visualize: Optional[bool] = None
56
57
 
57
58
 
58
59
  class LayoutParsingResult(BaseModel):
@@ -33,6 +33,7 @@ INFER_ENDPOINT: Final[str] = "/object-detection"
33
33
  class InferRequest(BaseModel):
34
34
  image: str
35
35
  threshold: Optional[Union[float, Dict[int, float]]] = None
36
+ visualize: Optional[bool] = None
36
37
 
37
38
 
38
39
  class DetectedObject(BaseModel):
@@ -41,6 +41,7 @@ class InferRequest(ocr.BaseInferRequest):
41
41
  textDetBoxThresh: Optional[float] = None
42
42
  textDetUnclipRatio: Optional[float] = None
43
43
  textRecScoreThresh: Optional[float] = None
44
+ visualize: Optional[bool] = None
44
45
 
45
46
 
46
47
  class OCRResult(BaseModel):
@@ -34,6 +34,7 @@ class InferRequest(BaseModel):
34
34
  image: str
35
35
  prompt: str
36
36
  thresholds: Optional[Dict[str, float]] = None
37
+ visualize: Optional[bool] = None
37
38
 
38
39
 
39
40
  class DetectedObject(BaseModel):
@@ -34,6 +34,7 @@ class InferRequest(BaseModel):
34
34
  image: str
35
35
  prompt: List[List[float]]
36
36
  promptType: str
37
+ visualize: Optional[bool] = None
37
38
 
38
39
 
39
40
  class MaskInfo(BaseModel):
@@ -35,6 +35,7 @@ INFER_ENDPOINT: Final[str] = "/pedestrian-attribute-recognition"
35
35
  class InferRequest(BaseModel):
36
36
  image: str
37
37
  detThreshold: Optional[float] = None
38
+ visualize: Optional[bool] = None
38
39
  clsThreshold: Optional[
39
40
  Union[float, Dict[Union[Literal["default"], int], float], List[float]]
40
41
  ] = None
@@ -57,6 +57,7 @@ class AnalyzeImagesRequest(ocr.BaseInferRequest):
57
57
  sealDetBoxThresh: Optional[float] = None
58
58
  sealDetUnclipRatio: Optional[float] = None
59
59
  sealRecScoreThresh: Optional[float] = None
60
+ visualize: Optional[bool] = None
60
61
 
61
62
 
62
63
  class LayoutParsingResult(BaseModel):
@@ -78,8 +79,8 @@ BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
78
79
 
79
80
  class BuildVectorStoreRequest(BaseModel):
80
81
  visualInfo: List[dict]
81
- minCharacters: Optional[int] = None
82
- blockSize: Optional[int] = None
82
+ minCharacters: int = 3500
83
+ blockSize: int = 300
83
84
  retrieverConfig: Optional[dict] = None
84
85
 
85
86
 
@@ -93,9 +94,9 @@ CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
93
94
  class ChatRequest(BaseModel):
94
95
  keyList: List[str]
95
96
  visualInfo: List[dict]
96
- useVectorRetrieval: Optional[bool] = None
97
+ useVectorRetrieval: bool = True
97
98
  vectorInfo: Optional[dict] = None
98
- minCharacters: Optional[int] = None
99
+ minCharacters: int = 3500
99
100
  textTaskDescription: Optional[str] = None
100
101
  textOutputFormat: Optional[str] = None
101
102
  # Is the "Str" in the name unnecessary? Keep the names consistent with the
@@ -61,6 +61,7 @@ class AnalyzeImagesRequest(ocr.BaseInferRequest):
61
61
  sealDetBoxThresh: Optional[float] = None
62
62
  sealDetUnclipRatio: Optional[float] = None
63
63
  sealRecScoreThresh: Optional[float] = None
64
+ visualize: Optional[bool] = None
64
65
 
65
66
 
66
67
  class LayoutParsingResult(BaseModel):
@@ -80,8 +81,8 @@ BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
80
81
 
81
82
  class BuildVectorStoreRequest(BaseModel):
82
83
  visualInfo: List[dict]
83
- minCharacters: Optional[int] = None
84
- blockSize: Optional[int] = None
84
+ minCharacters: int = 3500
85
+ blockSize: int = 300
85
86
  retrieverConfig: Optional[dict] = None
86
87
 
87
88
 
@@ -108,9 +109,9 @@ CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
108
109
  class ChatRequest(BaseModel):
109
110
  keyList: List[str]
110
111
  visualInfo: List[dict]
111
- useVectorRetrieval: Optional[bool] = None
112
+ useVectorRetrieval: bool = True
112
113
  vectorInfo: Optional[dict] = None
113
- minCharacters: Optional[int] = None
114
+ minCharacters: int = 3500
114
115
  textTaskDescription: Optional[str] = None
115
116
  textOutputFormat: Optional[str] = None
116
117
  textRulesStr: Optional[str] = None
@@ -122,7 +123,7 @@ class ChatRequest(BaseModel):
122
123
  tableFewShotDemoTextContent: Optional[str] = None
123
124
  tableFewShotDemoKeyValueList: Optional[str] = None
124
125
  mllmPredictInfo: Optional[dict] = None
125
- mllmIntegrationStrategy: Optional[str] = None
126
+ mllmIntegrationStrategy: str = "integration"
126
127
  chatBotConfig: Optional[dict] = None
127
128
  retrieverConfig: Optional[dict] = None
128
129
 
@@ -0,0 +1,115 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Dict, Final, List, Optional, Tuple, Union
16
+
17
+ from pydantic import BaseModel
18
+
19
+ from ..infra.models import DataInfo, PrimaryOperations
20
+ from .shared import ocr
21
+
22
+ __all__ = [
23
+ "ANALYZE_IMAGES_ENDPOINT",
24
+ "AnalyzeImagesRequest",
25
+ "LayoutParsingResult",
26
+ "AnalyzeImagesResult",
27
+ "TRANSLATE_ENDPOINT",
28
+ "TranslateRequest",
29
+ "TranslationResult",
30
+ "TranslateResult",
31
+ "PRIMARY_OPERATIONS",
32
+ ]
33
+
34
+ ANALYZE_IMAGES_ENDPOINT: Final[str] = "/doctrans-visual"
35
+
36
+
37
+ class AnalyzeImagesRequest(ocr.BaseInferRequest):
38
+ useDocOrientationClassify: Optional[bool] = False
39
+ useDocUnwarping: Optional[bool] = False
40
+ useTextlineOrientation: Optional[bool] = None
41
+ useSealRecognition: Optional[bool] = None
42
+ useTableRecognition: Optional[bool] = None
43
+ useFormulaRecognition: Optional[bool] = None
44
+ useChartRecognition: Optional[bool] = False
45
+ useRegionDetection: Optional[bool] = None
46
+ layoutThreshold: Optional[Union[float, dict]] = None
47
+ layoutNms: Optional[bool] = None
48
+ layoutUnclipRatio: Optional[Union[float, Tuple[float, float], dict]] = None
49
+ layoutMergeBboxesMode: Optional[Union[str, dict]] = None
50
+ textDetLimitSideLen: Optional[int] = None
51
+ textDetLimitType: Optional[str] = None
52
+ textDetThresh: Optional[float] = None
53
+ textDetBoxThresh: Optional[float] = None
54
+ textDetUnclipRatio: Optional[float] = None
55
+ textRecScoreThresh: Optional[float] = None
56
+ sealDetLimitSideLen: Optional[int] = None
57
+ sealDetLimitType: Optional[str] = None
58
+ sealDetThresh: Optional[float] = None
59
+ sealDetBoxThresh: Optional[float] = None
60
+ sealDetUnclipRatio: Optional[float] = None
61
+ sealRecScoreThresh: Optional[float] = None
62
+ useWiredTableCellsTransToHtml: bool = False
63
+ useWirelessTableCellsTransToHtml: bool = False
64
+ useTableOrientationClassify: bool = True
65
+ useOcrResultsWithTableCells: bool = True
66
+ useE2eWiredTableRecModel: bool = False
67
+ useE2eWirelessTableRecModel: bool = True
68
+ visualize: Optional[bool] = None
69
+
70
+
71
+ class LayoutParsingResult(BaseModel):
72
+ prunedResult: dict
73
+ markdown: ocr.MarkdownData
74
+ outputImages: Optional[Dict[str, str]] = None
75
+ inputImage: Optional[str] = None
76
+
77
+
78
+ class AnalyzeImagesResult(BaseModel):
79
+ layoutParsingResults: List[LayoutParsingResult]
80
+ dataInfo: DataInfo
81
+
82
+
83
+ TRANSLATE_ENDPOINT: Final[str] = "/doctrans-translate"
84
+
85
+
86
+ class TranslateRequest(BaseModel):
87
+ markdownList: List[ocr.MarkdownData]
88
+ targetLanguage: str = "zh"
89
+ chunkSize: int = 5000
90
+ taskDescription: Optional[str] = None
91
+ outputFormat: Optional[str] = None
92
+ rulesStr: Optional[str] = None
93
+ fewShotDemoTextContent: Optional[str] = None
94
+ fewShotDemoKeyValueList: Optional[str] = None
95
+ chatBotConfig: Optional[dict] = None
96
+ sleepInterval: float = 0
97
+
98
+
99
+ class TranslationResult(BaseModel):
100
+ language: str
101
+ markdown: ocr.MarkdownData
102
+
103
+
104
+ class TranslateResult(BaseModel):
105
+ translationResults: List[TranslationResult]
106
+
107
+
108
+ PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
109
+ "analyzeImages": (
110
+ ANALYZE_IMAGES_ENDPOINT,
111
+ AnalyzeImagesRequest,
112
+ AnalyzeImagesResult,
113
+ ),
114
+ "translate": (TRANSLATE_ENDPOINT, TranslateRequest, TranslateResult),
115
+ }
@@ -90,6 +90,7 @@ class InferRequest(BaseModel):
90
90
  recThreshold: Optional[float] = None
91
91
  hammingRadius: Optional[float] = None
92
92
  topk: Optional[int] = None
93
+ visualize: Optional[bool] = None
93
94
 
94
95
 
95
96
  class RecResult(BaseModel):
@@ -22,7 +22,6 @@ from .shared import ocr
22
22
  __all__ = [
23
23
  "INFER_ENDPOINT",
24
24
  "InferRequest",
25
- "MarkdownData",
26
25
  "LayoutParsingResult",
27
26
  "InferResult",
28
27
  "PRIMARY_OPERATIONS",
@@ -62,18 +61,12 @@ class InferRequest(ocr.BaseInferRequest):
62
61
  useOcrResultsWithTableCells: bool = True
63
62
  useE2eWiredTableRecModel: bool = False
64
63
  useE2eWirelessTableRecModel: bool = True
65
-
66
-
67
- class MarkdownData(BaseModel):
68
- text: str
69
- images: Dict[str, str]
70
- isStart: bool
71
- isEnd: bool
64
+ visualize: Optional[bool] = None
72
65
 
73
66
 
74
67
  class LayoutParsingResult(BaseModel):
75
68
  prunedResult: dict
76
- markdown: MarkdownData
69
+ markdown: ocr.MarkdownData
77
70
  outputImages: Optional[Dict[str, str]] = None
78
71
  inputImage: Optional[str] = None
79
72
 
@@ -33,6 +33,7 @@ INFER_ENDPOINT: Final[str] = "/rotated-object-detection"
33
33
  class InferRequest(BaseModel):
34
34
  image: str
35
35
  threshold: Optional[Union[float, Dict[int, float]]] = None
36
+ visualize: Optional[bool] = None
36
37
 
37
38
 
38
39
  class DetectedObject(BaseModel):
@@ -44,6 +44,7 @@ class InferRequest(ocr.BaseInferRequest):
44
44
  sealDetBoxThresh: Optional[float] = None
45
45
  sealDetUnclipRatio: Optional[float] = None
46
46
  sealRecScoreThresh: Optional[float] = None
47
+ visualize: Optional[bool] = None
47
48
 
48
49
 
49
50
  class SealRecResult(BaseModel):
@@ -32,6 +32,7 @@ INFER_ENDPOINT: Final[str] = "/semantic-segmentation"
32
32
  class InferRequest(BaseModel):
33
33
  image: str
34
34
  targetSize: Optional[Union[int, image_segmentation.Size]] = None
35
+ visualize: Optional[bool] = None
35
36
 
36
37
 
37
38
  class InferResult(BaseModel):
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional
15
+ from typing import Dict, Optional
16
16
 
17
17
  from pydantic import BaseModel
18
18
  from typing_extensions import Literal, TypeAlias
@@ -23,3 +23,10 @@ FileType: TypeAlias = Literal[0, 1]
23
23
  class BaseInferRequest(BaseModel):
24
24
  file: str
25
25
  fileType: Optional[FileType] = None
26
+
27
+
28
+ class MarkdownData(BaseModel):
29
+ text: str
30
+ isStart: bool
31
+ isEnd: bool
32
+ images: Optional[Dict[str, str]] = None
@@ -33,6 +33,7 @@ INFER_ENDPOINT: Final[str] = "/small-object-detection"
33
33
  class InferRequest(BaseModel):
34
34
  image: str
35
35
  threshold: Optional[Union[float, Dict[int, float]]] = None
36
+ visualize: Optional[bool] = None
36
37
 
37
38
 
38
39
  class DetectedObject(BaseModel):
@@ -42,6 +42,7 @@ class InferRequest(ocr.BaseInferRequest):
42
42
  textDetUnclipRatio: Optional[float] = None
43
43
  textRecScoreThresh: Optional[float] = None
44
44
  useOcrResultsWithTableCells: bool = False
45
+ visualize: Optional[bool] = None
45
46
 
46
47
 
47
48
  class TableRecResult(BaseModel):
@@ -47,6 +47,7 @@ class InferRequest(ocr.BaseInferRequest):
47
47
  useWirelessTableCellsTransToHtml: bool = False
48
48
  useTableOrientationClassify: bool = True
49
49
  useOcrResultsWithTableCells: bool = True
50
+ visualize: Optional[bool] = None
50
51
 
51
52
 
52
53
  class TableRecResult(BaseModel):
@@ -25,6 +25,7 @@ INFER_ENDPOINT: Final[str] = "/time-series-anomaly-detection"
25
25
 
26
26
  class InferRequest(BaseModel):
27
27
  csv: str
28
+ visualize: Optional[bool] = None
28
29
 
29
30
 
30
31
  class InferResult(BaseModel):
@@ -25,6 +25,7 @@ INFER_ENDPOINT: Final[str] = "/time-series-classification"
25
25
 
26
26
  class InferRequest(BaseModel):
27
27
  csv: str
28
+ visualize: Optional[bool] = None
28
29
 
29
30
 
30
31
  class InferResult(BaseModel):
@@ -25,6 +25,7 @@ INFER_ENDPOINT: Final[str] = "/time-series-forecasting"
25
25
 
26
26
  class InferRequest(BaseModel):
27
27
  csv: str
28
+ visualize: Optional[bool] = None
28
29
 
29
30
 
30
31
  class InferResult(BaseModel):
@@ -35,6 +35,7 @@ INFER_ENDPOINT: Final[str] = "/vehicle-attribute-recognition"
35
35
  class InferRequest(BaseModel):
36
36
  image: str
37
37
  detThreshold: Optional[float] = None
38
+ visualize: Optional[bool] = None
38
39
  clsThreshold: Optional[
39
40
  Union[float, Dict[Union[Literal["default"], int], float], List[float]]
40
41
  ] = None
@@ -132,13 +132,25 @@ def suggest_inference_backend_and_config(
132
132
  available_backends = []
133
133
  if "paddle" in model_paths:
134
134
  available_backends.append("paddle")
135
- if is_built_with_openvino() and is_onnx_model_available:
135
+ if (
136
+ is_built_with_openvino()
137
+ and is_onnx_model_available
138
+ and hpi_config.device_type == "cpu"
139
+ ):
136
140
  available_backends.append("openvino")
137
- if is_built_with_ort() and is_onnx_model_available:
141
+ if (
142
+ is_built_with_ort()
143
+ and is_onnx_model_available
144
+ and hpi_config.device_type in ("cpu", "gpu")
145
+ ):
138
146
  available_backends.append("onnxruntime")
139
- if is_built_with_trt() and is_onnx_model_available:
147
+ if (
148
+ is_built_with_trt()
149
+ and is_onnx_model_available
150
+ and hpi_config.device_type == "gpu"
151
+ ):
140
152
  available_backends.append("tensorrt")
141
- if is_built_with_om() and "om" in model_paths:
153
+ if is_built_with_om() and "om" in model_paths and hpi_config.device_type == "npu":
142
154
  available_backends.append("om")
143
155
 
144
156
  if not available_backends:
@@ -188,20 +200,21 @@ def suggest_inference_backend_and_config(
188
200
  hpi_config.pdx_model_name
189
201
  ].copy()
190
202
 
191
- if not is_mkldnn_available():
192
- if "paddle_mkldnn" in supported_pseudo_backends:
193
- supported_pseudo_backends.remove("paddle_mkldnn")
203
+ if not (is_mkldnn_available() and hpi_config.device_type == "cpu"):
204
+ for pb in supported_pseudo_backends[:]:
205
+ if pb.startswith("paddle_mkldnn"):
206
+ supported_pseudo_backends.remove(pb)
194
207
 
195
208
  # XXX
196
209
  if not (
197
210
  USE_PIR_TRT
198
211
  and importlib.util.find_spec("tensorrt")
199
212
  and ctypes.util.find_library("nvinfer")
213
+ and hpi_config.device_type == "gpu"
200
214
  ):
201
- if "paddle_tensorrt" in supported_pseudo_backends:
202
- supported_pseudo_backends.remove("paddle_tensorrt")
203
- if "paddle_tensorrt_fp16" in supported_pseudo_backends:
204
- supported_pseudo_backends.remove("paddle_tensorrt_fp16")
215
+ for pb in supported_pseudo_backends[:]:
216
+ if pb.startswith("paddle_tensorrt"):
217
+ supported_pseudo_backends.remove(pb)
205
218
 
206
219
  supported_backends = []
207
220
  backend_to_pseudo_backends = defaultdict(list)
@@ -227,12 +240,27 @@ def suggest_inference_backend_and_config(
227
240
  f"{repr(hpi_config.backend)} is not a supported inference backend.",
228
241
  )
229
242
  suggested_backend = hpi_config.backend
230
- pseudo_backends = backend_to_pseudo_backends[suggested_backend]
231
- pseudo_backend = pseudo_backends[0]
232
243
  else:
233
244
  # Prefer the first one.
234
245
  suggested_backend = supported_backends[0]
235
- pseudo_backend = supported_pseudo_backends[0]
246
+
247
+ pseudo_backends = backend_to_pseudo_backends[suggested_backend]
248
+
249
+ if hpi_config.backend_config is not None:
250
+ requested_base_pseudo_backend = None
251
+ if suggested_backend == "paddle":
252
+ if "run_mode" in hpi_config.backend_config:
253
+ if hpi_config.backend_config["run_mode"].startswith("mkldnn"):
254
+ requested_base_pseudo_backend = "paddle_mkldnn"
255
+ elif hpi_config.backend_config["run_mode"].startswith("trt"):
256
+ requested_base_pseudo_backend = "paddle_tensorrt"
257
+ if requested_base_pseudo_backend:
258
+ for pb in pseudo_backends:
259
+ if pb.startswith(requested_base_pseudo_backend):
260
+ break
261
+ else:
262
+ return None, "Unsupported backend configuration."
263
+ pseudo_backend = pseudo_backends[0]
236
264
 
237
265
  suggested_backend_config = {}
238
266
  if suggested_backend == "paddle":
@@ -1992,7 +1992,6 @@
1992
1992
  "onnxruntime"
1993
1993
  ],
1994
1994
  "PP-OCRv4_server_seal_det": [
1995
- "paddle_tensorrt",
1996
1995
  "tensorrt",
1997
1996
  "onnxruntime",
1998
1997
  "paddle"
@@ -2094,7 +2093,6 @@
2094
2093
  "onnxruntime"
2095
2094
  ],
2096
2095
  "PP-OCRv4_server_det": [
2097
- "paddle_tensorrt_fp16",
2098
2096
  "tensorrt",
2099
2097
  "onnxruntime",
2100
2098
  "paddle"
@@ -17,6 +17,7 @@ from .readers import (
17
17
  AudioReader,
18
18
  CSVReader,
19
19
  ImageReader,
20
+ MarkDownReader,
20
21
  PDFReader,
21
22
  ReaderType,
22
23
  VideoReader,
@@ -52,6 +52,8 @@ class ReaderType(enum.Enum):
52
52
  TS = 5
53
53
  PDF = 6
54
54
  YAML = 8
55
+ MARKDOWN = 9
56
+ TXT = 10
55
57
 
56
58
 
57
59
  class _BaseReader(object):
@@ -206,6 +208,41 @@ class YAMLReader(_BaseReader):
206
208
  return ReaderType.YAML
207
209
 
208
210
 
211
+ class MarkDownReader(_BaseReader):
212
+
213
+ def __init__(self, backend="Markdown", **bk_args):
214
+ super().__init__(backend, **bk_args)
215
+
216
+ def read(self, in_path):
217
+ return self._backend.read_file(str(in_path))
218
+
219
+ def _init_backend(self, bk_type, bk_args):
220
+ if bk_type == "Markdown":
221
+ return TXTReaderBackend(**bk_args)
222
+ else:
223
+ raise ValueError("Unsupported backend type")
224
+
225
+ def get_type(self):
226
+ return ReaderType.MARKDOWN
227
+
228
+
229
+ class TXTReader(_BaseReader):
230
+ """TXTReader"""
231
+
232
+ def __init__(self, backend="txt", **bk_args):
233
+ super().__init__(backend, **bk_args)
234
+
235
+ def read(self, in_path):
236
+ return self._backend.read_file(str(in_path))
237
+
238
+ def _init_backend(self, bk_type, bk_args):
239
+ if bk_type == "txt":
240
+ return TXTReaderBackend(**bk_args)
241
+
242
+ def get_type(self):
243
+ return ReaderType.TXT
244
+
245
+
209
246
  class _BaseReaderBackend(object):
210
247
  """_BaseReaderBackend"""
211
248
 
@@ -261,6 +298,15 @@ class PDFReaderBackend(_BaseReaderBackend):
261
298
  yield img_cv
262
299
 
263
300
 
301
+ class TXTReaderBackend(_BaseReaderBackend):
302
+ """TXTReaderBackend"""
303
+
304
+ def read_file(self, in_path):
305
+ with open(in_path, "r") as f:
306
+ data = f.read()
307
+ return data
308
+
309
+
264
310
  class _VideoReaderBackend(_BaseReaderBackend):
265
311
  """_VideoReaderBackend"""
266
312
 
@@ -54,6 +54,8 @@ class WriterType(enum.Enum):
54
54
  XLSX = 6
55
55
  CSV = 7
56
56
  YAML = 8
57
+ MARKDOWN = 9
58
+ TXT = 10
57
59
 
58
60
 
59
61
  class _BaseWriter(object):
@@ -359,7 +359,11 @@ PP-LCNet_x1_0_vehicle_attribute_infer.tar",
359
359
  PP-OCRv5_server_rec_infer.tar",
360
360
  "PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/\
361
361
  PP-OCRv5_mobile_rec_infer.tar",
362
+ "eslav_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/\
363
+ eslav_PP-OCRv5_mobile_rec_infer.tar",
362
364
  "PP-DocBee2-3B": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-DocBee2-3B_infer.tar",
365
+ "latin_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/latin_PP-OCRv5_mobile_rec_infer.tar",
366
+ "korean_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/korean_PP-OCRv5_mobile_rec_infer.tar",
363
367
  }
364
368
 
365
369
 
@@ -384,6 +388,7 @@ HUGGINGFACE_MODELS = [
384
388
  "PicoDet-S_layout_17cls",
385
389
  "PicoDet-S_layout_3cls",
386
390
  "PP-DocBee2-3B",
391
+ "PP-Chart2Table",
387
392
  "PP-DocBee-2B",
388
393
  "PP-DocBee-7B",
389
394
  "PP-DocBlockLayout",
@@ -396,8 +401,10 @@ HUGGINGFACE_MODELS = [
396
401
  "PP-FormulaNet_plus-M",
397
402
  "PP-FormulaNet_plus-S",
398
403
  "PP-FormulaNet-S",
404
+ "PP-LCNet_x0_25_textline_ori",
399
405
  "PP-LCNet_x1_0_doc_ori",
400
406
  "PP-LCNet_x1_0_table_cls",
407
+ "PP-LCNet_x1_0_textline_ori",
401
408
  "PP-OCRv3_mobile_det",
402
409
  "PP-OCRv3_mobile_rec",
403
410
  "PP-OCRv3_server_det",