paddlex 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. paddlex/.version +1 -1
  2. paddlex/configs/modules/text_recognition/eslav_PP-OCRv5_mobile_rec.yaml +39 -0
  3. paddlex/configs/modules/text_recognition/korean_PP-OCRv5_mobile_rec.yaml +39 -0
  4. paddlex/configs/modules/text_recognition/latin_PP-OCRv5_mobile_rec.yaml +39 -0
  5. paddlex/configs/pipelines/PP-DocTranslation.yaml +261 -0
  6. paddlex/inference/common/batch_sampler/__init__.py +1 -0
  7. paddlex/inference/common/batch_sampler/markdown_batch_sampler.py +116 -0
  8. paddlex/inference/common/result/base_cv_result.py +2 -3
  9. paddlex/inference/common/result/mixin.py +3 -1
  10. paddlex/inference/models/base/predictor/base_predictor.py +2 -0
  11. paddlex/inference/models/common/static_infer.py +2 -0
  12. paddlex/inference/models/common/vlm/generation/utils.py +2 -2
  13. paddlex/inference/models/formula_recognition/result.py +2 -2
  14. paddlex/inference/models/image_classification/result.py +3 -5
  15. paddlex/inference/models/image_multilabel_classification/result.py +2 -2
  16. paddlex/inference/models/object_detection/result.py +2 -2
  17. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +3 -0
  18. paddlex/inference/models/text_recognition/predictor.py +51 -1
  19. paddlex/inference/models/text_recognition/result.py +5 -2
  20. paddlex/inference/models/video_classification/result.py +3 -3
  21. paddlex/inference/models/video_detection/result.py +2 -4
  22. paddlex/inference/pipelines/__init__.py +1 -0
  23. paddlex/inference/pipelines/attribute_recognition/result.py +2 -2
  24. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +1 -0
  25. paddlex/inference/pipelines/components/prompt_engineering/generate_translate_prompt.py +179 -0
  26. paddlex/inference/pipelines/doc_preprocessor/result.py +2 -2
  27. paddlex/inference/pipelines/formula_recognition/result.py +2 -2
  28. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +2 -0
  29. paddlex/inference/pipelines/layout_parsing/result_v2.py +11 -4
  30. paddlex/inference/pipelines/ocr/pipeline.py +2 -0
  31. paddlex/inference/pipelines/ocr/result.py +11 -7
  32. paddlex/inference/pipelines/pp_doctranslation/__init__.py +15 -0
  33. paddlex/inference/pipelines/pp_doctranslation/pipeline.py +523 -0
  34. paddlex/inference/pipelines/pp_doctranslation/result.py +39 -0
  35. paddlex/inference/pipelines/pp_doctranslation/utils.py +260 -0
  36. paddlex/inference/pipelines/pp_shitu_v2/result.py +2 -2
  37. paddlex/inference/serving/basic_serving/_app.py +1 -0
  38. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +4 -2
  39. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +5 -1
  40. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +4 -2
  41. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +4 -2
  42. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +4 -2
  43. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +4 -2
  44. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +4 -2
  45. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +4 -2
  46. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +4 -2
  47. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +4 -2
  48. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +4 -2
  49. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +4 -2
  50. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +4 -2
  51. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +4 -2
  52. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -24
  53. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -26
  54. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py +203 -0
  55. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +4 -2
  56. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +4 -2
  57. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +4 -2
  58. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +4 -2
  59. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +4 -2
  60. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +4 -2
  61. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +4 -2
  62. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -2
  63. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +4 -2
  64. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +4 -2
  65. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +4 -2
  66. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +4 -2
  67. paddlex/inference/serving/infra/utils.py +22 -17
  68. paddlex/inference/serving/schemas/anomaly_detection.py +1 -0
  69. paddlex/inference/serving/schemas/doc_preprocessor.py +1 -0
  70. paddlex/inference/serving/schemas/face_recognition.py +1 -0
  71. paddlex/inference/serving/schemas/formula_recognition.py +1 -0
  72. paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -0
  73. paddlex/inference/serving/schemas/image_classification.py +1 -0
  74. paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -0
  75. paddlex/inference/serving/schemas/instance_segmentation.py +1 -0
  76. paddlex/inference/serving/schemas/layout_parsing.py +1 -0
  77. paddlex/inference/serving/schemas/object_detection.py +1 -0
  78. paddlex/inference/serving/schemas/ocr.py +1 -0
  79. paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -0
  80. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -0
  81. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -0
  82. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +5 -4
  83. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +6 -5
  84. paddlex/inference/serving/schemas/pp_doctranslation.py +115 -0
  85. paddlex/inference/serving/schemas/pp_shituv2.py +1 -0
  86. paddlex/inference/serving/schemas/pp_structurev3.py +2 -9
  87. paddlex/inference/serving/schemas/rotated_object_detection.py +1 -0
  88. paddlex/inference/serving/schemas/seal_recognition.py +1 -0
  89. paddlex/inference/serving/schemas/semantic_segmentation.py +1 -0
  90. paddlex/inference/serving/schemas/shared/ocr.py +8 -1
  91. paddlex/inference/serving/schemas/small_object_detection.py +1 -0
  92. paddlex/inference/serving/schemas/table_recognition.py +1 -0
  93. paddlex/inference/serving/schemas/table_recognition_v2.py +1 -0
  94. paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -0
  95. paddlex/inference/serving/schemas/ts_classification.py +1 -0
  96. paddlex/inference/serving/schemas/ts_forecast.py +1 -0
  97. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -0
  98. paddlex/inference/utils/hpi.py +42 -14
  99. paddlex/inference/utils/hpi_model_info_collection.json +0 -2
  100. paddlex/inference/utils/io/__init__.py +1 -0
  101. paddlex/inference/utils/io/readers.py +46 -0
  102. paddlex/inference/utils/io/writers.py +2 -0
  103. paddlex/inference/utils/official_models.py +7 -0
  104. paddlex/inference/utils/pp_option.py +34 -18
  105. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -2
  106. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  107. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  108. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  109. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  110. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  111. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  112. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  113. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  114. paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  115. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  116. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  117. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  118. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  119. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  120. paddlex/modules/text_recognition/model_list.py +3 -0
  121. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  122. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  123. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  124. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +27 -0
  125. paddlex/repo_manager/meta.py +3 -3
  126. paddlex/utils/device.py +4 -1
  127. paddlex/utils/download.py +10 -7
  128. paddlex/utils/{fonts/__init__.py → fonts.py} +45 -26
  129. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/METADATA +25 -1
  130. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/RECORD +134 -122
  131. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/LICENSE +0 -0
  132. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/WHEEL +0 -0
  133. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/entry_points.txt +0 -0
  134. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,523 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ from time import sleep
17
+ from typing import Any, Dict, List, Optional, Tuple, Union
18
+
19
+ import numpy as np
20
+
21
+ from ....utils import logging
22
+ from ....utils.deps import pipeline_requires_extra
23
+ from ...common.batch_sampler import MarkDownBatchSampler
24
+ from ...utils.hpi import HPIConfig
25
+ from ...utils.pp_option import PaddlePredictorOption
26
+ from ..base import BasePipeline
27
+ from .result import MarkdownResult
28
+ from .utils import (
29
+ split_original_texts,
30
+ split_text_recursive,
31
+ translate_code_block,
32
+ translate_html_block,
33
+ )
34
+
35
+
36
+ @pipeline_requires_extra("trans")
37
+ class PP_DocTranslation_Pipeline(BasePipeline):
38
+ """
39
+ PP_ DocTranslation_Pipeline
40
+ """
41
+
42
+ entities = ["PP-DocTranslation"]
43
+
44
+ def __init__(
45
+ self,
46
+ config: Dict,
47
+ device: str = None,
48
+ pp_option: PaddlePredictorOption = None,
49
+ use_hpip: bool = False,
50
+ hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
51
+ initial_predictor: bool = False,
52
+ ) -> None:
53
+ """Initializes the PP_Translation_Pipeline.
54
+
55
+ Args:
56
+ config (Dict): Configuration dictionary containing various settings.
57
+ device (str, optional): Device to run the predictions on. Defaults to None.
58
+ pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
59
+ use_hpip (bool, optional): Whether to use the high-performance
60
+ inference plugin (HPIP) by default. Defaults to False.
61
+ hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
62
+ The default high-performance inference configuration dictionary.
63
+ Defaults to None.
64
+ initial_predictor (bool, optional): Whether to initialize the predictor. Defaults to True.
65
+ """
66
+
67
+ super().__init__(
68
+ device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
69
+ )
70
+
71
+ self.pipeline_name = config["pipeline_name"]
72
+ self.config = config
73
+ self.use_layout_parser = config.get("use_layout_parser", True)
74
+
75
+ self.layout_parsing_pipeline = None
76
+ self.chat_bot = None
77
+
78
+ if initial_predictor:
79
+ self.inintial_visual_predictor(config)
80
+ self.inintial_chat_predictor(config)
81
+
82
+ self.markdown_batch_sampler = MarkDownBatchSampler()
83
+
84
+ def inintial_visual_predictor(self, config: dict) -> None:
85
+ """
86
+ Initializes the visual predictor with the given configuration.
87
+
88
+ Args:
89
+ config (dict): The configuration dictionary containing the necessary
90
+ parameters for initializing the predictor.
91
+ Returns:
92
+ None
93
+ """
94
+ self.use_layout_parser = config.get("use_layout_parser", True)
95
+
96
+ if self.use_layout_parser:
97
+ layout_parsing_config = config.get("SubPipelines", {}).get(
98
+ "LayoutParser",
99
+ {"pipeline_config_error": "config error for layout_parsing_pipeline!"},
100
+ )
101
+ self.layout_parsing_pipeline = self.create_pipeline(layout_parsing_config)
102
+ return
103
+
104
+ def inintial_chat_predictor(self, config: dict) -> None:
105
+ """
106
+ Initializes the chat predictor with the given configuration.
107
+
108
+ Args:
109
+ config (dict): The configuration dictionary containing the necessary
110
+ parameters for initializing the predictor.
111
+ Returns:
112
+ None
113
+ """
114
+ from .. import create_chat_bot
115
+
116
+ chat_bot_config = config.get("SubModules", {}).get(
117
+ "LLM_Chat",
118
+ {"chat_bot_config_error": "config error for llm chat bot!"},
119
+ )
120
+ self.chat_bot = create_chat_bot(chat_bot_config)
121
+
122
+ from .. import create_prompt_engineering
123
+
124
+ translate_pe_config = (
125
+ config.get("SubModules", {})
126
+ .get("PromptEngneering", {})
127
+ .get(
128
+ "Translate_CommonText",
129
+ {"pe_config_error": "config error for translate_pe_config!"},
130
+ )
131
+ )
132
+ self.translate_pe = create_prompt_engineering(translate_pe_config)
133
+ return
134
+
135
+ def predict(self, *args, **kwargs) -> None:
136
+ logging.error(
137
+ "PP-Translation Pipeline do not support to call `predict()` directly! Please invoke `visual_predict`, `build_vector`, `chat` sequentially to obtain the result."
138
+ )
139
+ return
140
+
141
+ def visual_predict(
142
+ self,
143
+ input: Union[str, List[str], np.ndarray, List[np.ndarray]],
144
+ use_doc_orientation_classify: Optional[bool] = False,
145
+ use_doc_unwarping: Optional[bool] = False,
146
+ use_textline_orientation: Optional[bool] = None,
147
+ use_seal_recognition: Optional[bool] = None,
148
+ use_table_recognition: Optional[bool] = None,
149
+ use_formula_recognition: Optional[bool] = None,
150
+ use_chart_recognition: Optional[bool] = False,
151
+ use_region_detection: Optional[bool] = None,
152
+ layout_threshold: Optional[Union[float, dict]] = None,
153
+ layout_nms: Optional[bool] = None,
154
+ layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None,
155
+ layout_merge_bboxes_mode: Optional[str] = None,
156
+ text_det_limit_side_len: Optional[int] = None,
157
+ text_det_limit_type: Optional[str] = None,
158
+ text_det_thresh: Optional[float] = None,
159
+ text_det_box_thresh: Optional[float] = None,
160
+ text_det_unclip_ratio: Optional[float] = None,
161
+ text_rec_score_thresh: Optional[float] = None,
162
+ seal_det_limit_side_len: Optional[int] = None,
163
+ seal_det_limit_type: Optional[str] = None,
164
+ seal_det_thresh: Optional[float] = None,
165
+ seal_det_box_thresh: Optional[float] = None,
166
+ seal_det_unclip_ratio: Optional[float] = None,
167
+ seal_rec_score_thresh: Optional[float] = None,
168
+ use_wired_table_cells_trans_to_html: bool = False,
169
+ use_wireless_table_cells_trans_to_html: bool = False,
170
+ use_table_orientation_classify: bool = True,
171
+ use_ocr_results_with_table_cells: bool = True,
172
+ use_e2e_wired_table_rec_model: bool = False,
173
+ use_e2e_wireless_table_rec_model: bool = True,
174
+ **kwargs,
175
+ ) -> dict:
176
+ """
177
+ This function takes an input image or a list of images and performs various visual
178
+ prediction tasks such as document orientation classification, document unwarping,
179
+ general OCR, seal recognition, and table recognition based on the provided flags.
180
+
181
+ Args:
182
+ input (Union[str, list[str], np.ndarray, list[np.ndarray]]): Input image path, list of image paths,
183
+ numpy array of an image, or list of numpy arrays.
184
+ use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
185
+ use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
186
+ use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
187
+ use_seal_recognition (Optional[bool]): Whether to use seal recognition.
188
+ use_table_recognition (Optional[bool]): Whether to use table recognition.
189
+ use_formula_recognition (Optional[bool]): Whether to use formula recognition.
190
+ use_region_detection (Optional[bool]): Whether to use region detection.
191
+ layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
192
+ layout_nms (bool, optional): Whether to use layout-aware NMS. Defaults to False.
193
+ layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
194
+ Defaults to None.
195
+ If it's a single number, then both width and height are used.
196
+ If it's a tuple of two numbers, then they are used separately for width and height respectively.
197
+ If it's None, then no unclipping will be performed.
198
+ layout_merge_bboxes_mode (Optional[str], optional): The mode for merging bounding boxes. Defaults to None.
199
+ text_det_limit_side_len (Optional[int]): Maximum side length for text detection.
200
+ text_det_limit_type (Optional[str]): Type of limit to apply for text detection.
201
+ text_det_thresh (Optional[float]): Threshold for text detection.
202
+ text_det_box_thresh (Optional[float]): Threshold for text detection boxes.
203
+ text_det_unclip_ratio (Optional[float]): Ratio for unclipping text detection boxes.
204
+ text_rec_score_thresh (Optional[float]): Score threshold for text recognition.
205
+ seal_det_limit_side_len (Optional[int]): Maximum side length for seal detection.
206
+ seal_det_limit_type (Optional[str]): Type of limit to apply for seal detection.
207
+ seal_det_thresh (Optional[float]): Threshold for seal detection.
208
+ seal_det_box_thresh (Optional[float]): Threshold for seal detection boxes.
209
+ seal_det_unclip_ratio (Optional[float]): Ratio for unclipping seal detection boxes.
210
+ seal_rec_score_thresh (Optional[float]): Score threshold for seal recognition.
211
+ use_wired_table_cells_trans_to_html (bool): Whether to use wired table cells trans to HTML.
212
+ use_wireless_table_cells_trans_to_html (bool): Whether to use wireless table cells trans to HTML.
213
+ use_table_orientation_classify (bool): Whether to use table orientation classification.
214
+ use_ocr_results_with_table_cells (bool): Whether to use OCR results processed by table cells.
215
+ use_e2e_wired_table_rec_model (bool): Whether to use end-to-end wired table recognition model.
216
+ use_e2e_wireless_table_rec_model (bool): Whether to use end-to-end wireless table recognition model.
217
+ **kwargs (Any): Additional settings to extend functionality.
218
+
219
+ Returns:
220
+ dict: A dictionary containing the layout parsing result.
221
+ """
222
+ if self.use_layout_parser == False:
223
+ logging.error("The models for layout parser are not initialized.")
224
+ yield {"error": "The models for layout parser are not initialized."}
225
+
226
+ if self.layout_parsing_pipeline is None:
227
+ logging.warning(
228
+ "The layout parsing pipeline is not initialized, will initialize it now."
229
+ )
230
+ self.inintial_visual_predictor(self.config)
231
+
232
+ for layout_parsing_result in self.layout_parsing_pipeline.predict(
233
+ input,
234
+ use_doc_orientation_classify=use_doc_orientation_classify,
235
+ use_doc_unwarping=use_doc_unwarping,
236
+ use_textline_orientation=use_textline_orientation,
237
+ use_seal_recognition=use_seal_recognition,
238
+ use_table_recognition=use_table_recognition,
239
+ use_formula_recognition=use_formula_recognition,
240
+ use_chart_recognition=use_chart_recognition,
241
+ use_region_detection=use_region_detection,
242
+ layout_threshold=layout_threshold,
243
+ layout_nms=layout_nms,
244
+ layout_unclip_ratio=layout_unclip_ratio,
245
+ layout_merge_bboxes_mode=layout_merge_bboxes_mode,
246
+ text_det_limit_side_len=text_det_limit_side_len,
247
+ text_det_limit_type=text_det_limit_type,
248
+ text_det_thresh=text_det_thresh,
249
+ text_det_box_thresh=text_det_box_thresh,
250
+ text_det_unclip_ratio=text_det_unclip_ratio,
251
+ text_rec_score_thresh=text_rec_score_thresh,
252
+ seal_det_box_thresh=seal_det_box_thresh,
253
+ seal_det_limit_side_len=seal_det_limit_side_len,
254
+ seal_det_limit_type=seal_det_limit_type,
255
+ seal_det_thresh=seal_det_thresh,
256
+ seal_det_unclip_ratio=seal_det_unclip_ratio,
257
+ seal_rec_score_thresh=seal_rec_score_thresh,
258
+ use_wired_table_cells_trans_to_html=use_wired_table_cells_trans_to_html,
259
+ use_wireless_table_cells_trans_to_html=use_wireless_table_cells_trans_to_html,
260
+ use_table_orientation_classify=use_table_orientation_classify,
261
+ use_ocr_results_with_table_cells=use_ocr_results_with_table_cells,
262
+ use_e2e_wired_table_rec_model=use_e2e_wired_table_rec_model,
263
+ use_e2e_wireless_table_rec_model=use_e2e_wireless_table_rec_model,
264
+ ):
265
+
266
+ visual_predict_res = {
267
+ "layout_parsing_result": layout_parsing_result,
268
+ }
269
+ yield visual_predict_res
270
+
271
+ def load_from_markdown(self, input):
272
+
273
+ markdown_info_list = []
274
+ for markdown_sample in self.markdown_batch_sampler.sample(input):
275
+ markdown_content = markdown_sample.instances[0]
276
+ input_path = markdown_sample.input_paths[0]
277
+ markdown_info = {
278
+ "input_path": input_path,
279
+ "page_index": None,
280
+ "markdown_texts": markdown_content,
281
+ "page_continuation_flags": (True, True),
282
+ }
283
+ markdown_info_list.append(MarkdownResult(markdown_info))
284
+ return markdown_info_list
285
+
286
+ def chunk_translate(self, md_blocks, chunk_size, translate_func):
287
+ """
288
+ Chunks the given markdown blocks into smaller chunks of size `chunk_size` and translates them using the given
289
+ translate function.
290
+
291
+ Args:
292
+ md_blocks (list): A list of tuples representing each block of markdown content. Each tuple consists of a string
293
+ indicating the block type ('text', 'code') and the actual content of the block.
294
+ chunk_size (int): The maximum size of each chunk.
295
+ translate_func (callable): A callable that accepts a string argument and returns the translated version of that string.
296
+
297
+ Returns:
298
+ str: A string containing all the translated chunks concatenated together with newlines between them.
299
+ """
300
+ translation_results = []
301
+ chunk = ""
302
+ logging.info(f"Split the original text into {len(md_blocks)} blocks")
303
+ logging.info("Starting translation...")
304
+ for idx, block in enumerate(md_blocks):
305
+ block_type, block_content = block
306
+ if block_type == "code":
307
+ if chunk.strip():
308
+ translation_results.append(translate_func(chunk.strip()))
309
+ chunk = "" # Clear the chunk
310
+ logging.info(f"Translating block {idx+1}/{len(md_blocks)}...")
311
+ translate_code_block(
312
+ block_content, chunk_size, translate_func, translation_results
313
+ )
314
+ elif len(block_content) < chunk_size and block_type == "text":
315
+ if len(chunk) + len(block_content) < chunk_size:
316
+ chunk += "\n\n" + block_content
317
+ else:
318
+ if chunk.strip():
319
+ logging.info(f"Translating block {idx+1}/{len(md_blocks)}...")
320
+ translation_results.append(translate_func(chunk.strip()))
321
+ chunk = block_content
322
+ else:
323
+ logging.info(f"Translating block {idx+1}/{len(md_blocks)}...")
324
+ if chunk.strip():
325
+ translation_results.append(translate_func(chunk.strip()))
326
+ chunk = "" # Clear the chunk
327
+
328
+ if block_type == "text":
329
+ split_text_recursive(
330
+ block_content, chunk_size, translate_func, translation_results
331
+ )
332
+ elif block_type == "text_with_html" or block_type == "html":
333
+ translate_html_block(
334
+ block_content, chunk_size, translate_func, translation_results
335
+ )
336
+ else:
337
+ raise ValueError(f"Unknown block type: {block_type}")
338
+
339
+ if chunk.strip():
340
+ translation_results.append(translate_func(chunk.strip()))
341
+ return "\n\n".join(translation_results)
342
+
343
+ def translate(
344
+ self,
345
+ ori_md_info_list: List[Dict],
346
+ target_language: str = "zh",
347
+ chunk_size: int = 3000,
348
+ task_description: str = None,
349
+ output_format: str = None,
350
+ rules_str: str = None,
351
+ few_shot_demo_text_content: str = None,
352
+ few_shot_demo_key_value_list: str = None,
353
+ llm_request_interval: float = 0.0,
354
+ chat_bot_config: Dict = None,
355
+ **kwargs,
356
+ ):
357
+ """
358
+ Translate the given original text into the specified target language using the configured translation model.
359
+
360
+ Args:
361
+ ori_md_info_list (List[Dict]): A list of dictionaries containing information about the original markdown text to be translated.
362
+ target_language (str, optional): The desired target language code. Defaults to "zh".
363
+ chunk_size (int, optional): The maximum number of characters allowed per chunk when splitting long texts. Defaults to 5000.
364
+ task_description (str, optional): A description of the task being performed by the translation model. Defaults to None.
365
+ output_format (str, optional): The desired output format of the translation result. Defaults to None.
366
+ rules_str (str, optional): Rules or guidelines for the translation model to follow. Defaults to None.
367
+ few_shot_demo_text_content (str, optional): Demo text content for the translation model. Defaults to None.
368
+ few_shot_demo_key_value_list (str, optional): Demo text key-value list for the translation model. Defaults to None.
369
+ llm_request_interval (float, optional): The interval in seconds between each request to the LLM. Defaults to 0.0.
370
+ chat_bot_config (Dict, optional): Configuration for the chat bot used in the translation process. Defaults to None.
371
+ **kwargs: Additional keyword arguments passed to the translation model.
372
+
373
+ Yields:
374
+ MarkdownResult: A dictionary containing the translation result in the target language.
375
+ """
376
+ if self.chat_bot is None:
377
+ logging.warning(
378
+ "The LLM chat bot is not initialized,will initialize it now."
379
+ )
380
+ self.inintial_chat_predictor(self.config)
381
+
382
+ if chat_bot_config is not None:
383
+ from .. import create_chat_bot
384
+
385
+ chat_bot = create_chat_bot(chat_bot_config)
386
+ else:
387
+ chat_bot = self.chat_bot
388
+
389
+ if (
390
+ isinstance(ori_md_info_list, list)
391
+ and ori_md_info_list[0].get("page_index") is not None
392
+ ):
393
+ # for multi page pdf
394
+ ori_md_info_list = [self.concatenate_markdown_pages(ori_md_info_list)]
395
+
396
+ if not isinstance(llm_request_interval, float):
397
+ llm_request_interval = float(llm_request_interval)
398
+
399
+ def translate_func(text):
400
+ """
401
+ Translate the given text using the configured translation model.
402
+
403
+ Args:
404
+ text (str): The text to be translated.
405
+
406
+ Returns:
407
+ str: The translated text in the target language.
408
+ """
409
+ sleep(llm_request_interval)
410
+ prompt = self.translate_pe.generate_prompt(
411
+ original_text=text,
412
+ language=target_language,
413
+ task_description=task_description,
414
+ output_format=output_format,
415
+ rules_str=rules_str,
416
+ few_shot_demo_text_content=few_shot_demo_text_content,
417
+ few_shot_demo_key_value_list=few_shot_demo_key_value_list,
418
+ )
419
+ translate = chat_bot.generate_chat_results(prompt=prompt).get("content", "")
420
+ if translate is None:
421
+ raise Exception("The call to the large model failed.")
422
+ return translate
423
+
424
+ base_prompt_content = self.translate_pe.generate_prompt(
425
+ original_text="",
426
+ language=target_language,
427
+ task_description=task_description,
428
+ output_format=output_format,
429
+ rules_str=rules_str,
430
+ few_shot_demo_text_content=few_shot_demo_text_content,
431
+ few_shot_demo_key_value_list=few_shot_demo_key_value_list,
432
+ )
433
+ base_prompt_length = len(base_prompt_content)
434
+
435
+ if chunk_size > base_prompt_length:
436
+ chunk_size = chunk_size - base_prompt_length
437
+ else:
438
+ raise ValueError(
439
+ f"Chunk size should be greater than the base prompt length ({base_prompt_length}), but got {chunk_size}."
440
+ )
441
+
442
+ for ori_md in ori_md_info_list:
443
+
444
+ original_texts = ori_md["markdown_texts"]
445
+ md_blocks = split_original_texts(original_texts)
446
+ target_language_texts = self.chunk_translate(
447
+ md_blocks, chunk_size, translate_func
448
+ )
449
+
450
+ yield MarkdownResult(
451
+ {
452
+ "language": target_language,
453
+ "input_path": ori_md["input_path"],
454
+ "page_index": ori_md["page_index"],
455
+ "page_continuation_flags": ori_md["page_continuation_flags"],
456
+ "markdown_texts": target_language_texts,
457
+ }
458
+ )
459
+
460
+ def concatenate_markdown_pages(self, markdown_list: list) -> tuple:
461
+ """
462
+ Concatenate Markdown content from multiple pages into a single document.
463
+
464
+ Args:
465
+ markdown_list (list): A list containing Markdown data for each page.
466
+
467
+ Returns:
468
+ tuple: A tuple containing the processed Markdown text.
469
+ """
470
+ markdown_texts = ""
471
+ previous_page_last_element_paragraph_end_flag = True
472
+
473
+ if len(markdown_list) == 0:
474
+ raise ValueError("The length of markdown_list is zero.")
475
+
476
+ for res in markdown_list:
477
+ # Get the paragraph flags for the current page
478
+ page_first_element_paragraph_start_flag: bool = res[
479
+ "page_continuation_flags"
480
+ ][0]
481
+ page_last_element_paragraph_end_flag: bool = res["page_continuation_flags"][
482
+ 1
483
+ ]
484
+
485
+ # Determine whether to add a space or a newline
486
+ if (
487
+ not page_first_element_paragraph_start_flag
488
+ and not previous_page_last_element_paragraph_end_flag
489
+ ):
490
+ last_char_of_markdown = markdown_texts[-1] if markdown_texts else ""
491
+ first_char_of_handler = (
492
+ res["markdown_texts"][0] if res["markdown_texts"] else ""
493
+ )
494
+
495
+ # Check if the last character and the first character are Chinese characters
496
+ last_is_chinese_char = (
497
+ re.match(r"[\u4e00-\u9fff]", last_char_of_markdown)
498
+ if last_char_of_markdown
499
+ else False
500
+ )
501
+ first_is_chinese_char = (
502
+ re.match(r"[\u4e00-\u9fff]", first_char_of_handler)
503
+ if first_char_of_handler
504
+ else False
505
+ )
506
+ if not (last_is_chinese_char or first_is_chinese_char):
507
+ markdown_texts += " " + res["markdown_texts"]
508
+ else:
509
+ markdown_texts += res["markdown_texts"]
510
+ else:
511
+ markdown_texts += "\n\n" + res["markdown_texts"]
512
+ previous_page_last_element_paragraph_end_flag = (
513
+ page_last_element_paragraph_end_flag
514
+ )
515
+
516
+ concatenate_result = {
517
+ "input_path": markdown_list[0]["input_path"],
518
+ "page_index": None,
519
+ "page_continuation_flags": (True, True),
520
+ "markdown_texts": markdown_texts,
521
+ }
522
+
523
+ return MarkdownResult(concatenate_result)
@@ -0,0 +1,39 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from pathlib import Path
16
+
17
+ from ...common.result import BaseCVResult, MarkdownMixin
18
+
19
+
20
+ class MarkdownResult(BaseCVResult, MarkdownMixin):
21
+ def __init__(self, data) -> None:
22
+ """Initializes a new instance of the class with the specified data."""
23
+ super().__init__(data)
24
+ MarkdownMixin.__init__(self)
25
+
26
+ def _get_input_fn(self):
27
+ fn = super()._get_input_fn()
28
+ if (page_idx := self.get("page_index", None)) is not None:
29
+ fp = Path(fn)
30
+ stem, suffix = fp.stem, fp.suffix
31
+ fn = f"{stem}_{page_idx}{suffix}"
32
+ if (language := self.get("language", None)) is not None:
33
+ fp = Path(fn)
34
+ stem, suffix = fp.stem, fp.suffix
35
+ fn = f"{stem}_{language}{suffix}"
36
+ return fn
37
+
38
+ def _to_markdown(self, pretty=True) -> dict:
39
+ return self