paddlex 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. paddlex/.version +1 -1
  2. paddlex/configs/modules/text_recognition/eslav_PP-OCRv5_mobile_rec.yaml +39 -0
  3. paddlex/configs/modules/text_recognition/korean_PP-OCRv5_mobile_rec.yaml +39 -0
  4. paddlex/configs/modules/text_recognition/latin_PP-OCRv5_mobile_rec.yaml +39 -0
  5. paddlex/configs/pipelines/PP-DocTranslation.yaml +261 -0
  6. paddlex/inference/common/batch_sampler/__init__.py +1 -0
  7. paddlex/inference/common/batch_sampler/markdown_batch_sampler.py +116 -0
  8. paddlex/inference/common/result/base_cv_result.py +2 -3
  9. paddlex/inference/common/result/mixin.py +3 -1
  10. paddlex/inference/models/base/predictor/base_predictor.py +2 -0
  11. paddlex/inference/models/common/static_infer.py +2 -0
  12. paddlex/inference/models/common/vlm/generation/utils.py +2 -2
  13. paddlex/inference/models/formula_recognition/result.py +2 -2
  14. paddlex/inference/models/image_classification/result.py +3 -5
  15. paddlex/inference/models/image_multilabel_classification/result.py +2 -2
  16. paddlex/inference/models/object_detection/result.py +2 -2
  17. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +3 -0
  18. paddlex/inference/models/text_recognition/predictor.py +51 -1
  19. paddlex/inference/models/text_recognition/result.py +5 -2
  20. paddlex/inference/models/video_classification/result.py +3 -3
  21. paddlex/inference/models/video_detection/result.py +2 -4
  22. paddlex/inference/pipelines/__init__.py +1 -0
  23. paddlex/inference/pipelines/attribute_recognition/result.py +2 -2
  24. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +1 -0
  25. paddlex/inference/pipelines/components/prompt_engineering/generate_translate_prompt.py +179 -0
  26. paddlex/inference/pipelines/doc_preprocessor/result.py +2 -2
  27. paddlex/inference/pipelines/formula_recognition/result.py +2 -2
  28. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +2 -0
  29. paddlex/inference/pipelines/layout_parsing/result_v2.py +11 -4
  30. paddlex/inference/pipelines/ocr/pipeline.py +2 -0
  31. paddlex/inference/pipelines/ocr/result.py +11 -7
  32. paddlex/inference/pipelines/pp_doctranslation/__init__.py +15 -0
  33. paddlex/inference/pipelines/pp_doctranslation/pipeline.py +523 -0
  34. paddlex/inference/pipelines/pp_doctranslation/result.py +39 -0
  35. paddlex/inference/pipelines/pp_doctranslation/utils.py +260 -0
  36. paddlex/inference/pipelines/pp_shitu_v2/result.py +2 -2
  37. paddlex/inference/serving/basic_serving/_app.py +1 -0
  38. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +4 -2
  39. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +5 -1
  40. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +4 -2
  41. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +4 -2
  42. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +4 -2
  43. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +4 -2
  44. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +4 -2
  45. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +4 -2
  46. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +4 -2
  47. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +4 -2
  48. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +4 -2
  49. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +4 -2
  50. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +4 -2
  51. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +4 -2
  52. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -24
  53. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -26
  54. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py +203 -0
  55. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +4 -2
  56. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +4 -2
  57. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +4 -2
  58. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +4 -2
  59. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +4 -2
  60. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +4 -2
  61. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +4 -2
  62. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -2
  63. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +4 -2
  64. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +4 -2
  65. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +4 -2
  66. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +4 -2
  67. paddlex/inference/serving/infra/utils.py +22 -17
  68. paddlex/inference/serving/schemas/anomaly_detection.py +1 -0
  69. paddlex/inference/serving/schemas/doc_preprocessor.py +1 -0
  70. paddlex/inference/serving/schemas/face_recognition.py +1 -0
  71. paddlex/inference/serving/schemas/formula_recognition.py +1 -0
  72. paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -0
  73. paddlex/inference/serving/schemas/image_classification.py +1 -0
  74. paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -0
  75. paddlex/inference/serving/schemas/instance_segmentation.py +1 -0
  76. paddlex/inference/serving/schemas/layout_parsing.py +1 -0
  77. paddlex/inference/serving/schemas/object_detection.py +1 -0
  78. paddlex/inference/serving/schemas/ocr.py +1 -0
  79. paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -0
  80. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -0
  81. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -0
  82. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +5 -4
  83. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +6 -5
  84. paddlex/inference/serving/schemas/pp_doctranslation.py +115 -0
  85. paddlex/inference/serving/schemas/pp_shituv2.py +1 -0
  86. paddlex/inference/serving/schemas/pp_structurev3.py +2 -9
  87. paddlex/inference/serving/schemas/rotated_object_detection.py +1 -0
  88. paddlex/inference/serving/schemas/seal_recognition.py +1 -0
  89. paddlex/inference/serving/schemas/semantic_segmentation.py +1 -0
  90. paddlex/inference/serving/schemas/shared/ocr.py +8 -1
  91. paddlex/inference/serving/schemas/small_object_detection.py +1 -0
  92. paddlex/inference/serving/schemas/table_recognition.py +1 -0
  93. paddlex/inference/serving/schemas/table_recognition_v2.py +1 -0
  94. paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -0
  95. paddlex/inference/serving/schemas/ts_classification.py +1 -0
  96. paddlex/inference/serving/schemas/ts_forecast.py +1 -0
  97. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -0
  98. paddlex/inference/utils/hpi.py +42 -14
  99. paddlex/inference/utils/hpi_model_info_collection.json +0 -2
  100. paddlex/inference/utils/io/__init__.py +1 -0
  101. paddlex/inference/utils/io/readers.py +46 -0
  102. paddlex/inference/utils/io/writers.py +2 -0
  103. paddlex/inference/utils/official_models.py +7 -0
  104. paddlex/inference/utils/pp_option.py +34 -18
  105. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -2
  106. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  107. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  108. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  109. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  110. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  111. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  112. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  113. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  114. paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  115. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  116. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  117. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  118. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  119. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  120. paddlex/modules/text_recognition/model_list.py +3 -0
  121. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  122. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  123. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  124. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +27 -0
  125. paddlex/repo_manager/meta.py +3 -3
  126. paddlex/utils/device.py +4 -1
  127. paddlex/utils/download.py +10 -7
  128. paddlex/utils/{fonts/__init__.py → fonts.py} +45 -26
  129. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/METADATA +25 -1
  130. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/RECORD +134 -122
  131. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/LICENSE +0 -0
  132. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/WHEEL +0 -0
  133. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/entry_points.txt +0 -0
  134. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/top_level.txt +0 -0
@@ -46,7 +46,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
46
46
  pipeline = ctx.pipeline
47
47
 
48
48
  log_id = serving_utils.generate_log_id()
49
-
49
+ visualize_enabled = (
50
+ request.visualize if request.visualize is not None else ctx.config.visualize
51
+ )
50
52
  images, data_info = await ocr_common.get_images(request, ctx)
51
53
 
52
54
  result = await pipeline.call(
@@ -79,7 +81,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
79
81
  visual_info: List[dict] = []
80
82
  for i, (img, item) in enumerate(zip(images, result)):
81
83
  pruned_res = common.prune_result(item["layout_parsing_result"].json["res"])
82
- if ctx.config.visualize:
84
+ if visualize_enabled:
83
85
  imgs = {
84
86
  "input_img": img,
85
87
  **item["layout_parsing_result"].img,
@@ -127,19 +129,13 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
127
129
  ) -> AIStudioResultResponse[schema.BuildVectorStoreResult]:
128
130
  pipeline = ctx.pipeline
129
131
 
130
- kwargs: Dict[str, Any] = {
131
- "flag_save_bytes_vector": True,
132
- "retriever_config": request.retrieverConfig,
133
- }
134
- if request.minCharacters is not None:
135
- kwargs["min_characters"] = request.minCharacters
136
- if request.blockSize is not None:
137
- kwargs["block_size"] = request.blockSize
138
-
139
132
  vector_info = await serving_utils.call_async(
140
133
  pipeline.pipeline.build_vector,
141
134
  request.visualInfo,
142
- **kwargs,
135
+ min_characters=request.minCharacters,
136
+ block_size=request.blockSize,
137
+ flag_save_bytes_vector=True,
138
+ retriever_config=request.retrieverConfig,
143
139
  )
144
140
 
145
141
  return AIStudioResultResponse[schema.BuildVectorStoreResult](
@@ -185,8 +181,13 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
185
181
  ) -> AIStudioResultResponse[schema.ChatResult]:
186
182
  pipeline = ctx.pipeline
187
183
 
188
- kwargs: Dict[str, Any] = dict(
184
+ result = await serving_utils.call_async(
185
+ pipeline.pipeline.chat,
186
+ request.keyList,
187
+ request.visualInfo,
188
+ use_vector_retrieval=request.useVectorRetrieval,
189
189
  vector_info=request.vectorInfo,
190
+ min_characters=request.minCharacters,
190
191
  text_task_description=request.textTaskDescription,
191
192
  text_output_format=request.textOutputFormat,
192
193
  text_rules_str=request.textRulesStr,
@@ -197,22 +198,11 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
197
198
  table_rules_str=request.tableRulesStr,
198
199
  table_few_shot_demo_text_content=request.tableFewShotDemoTextContent,
199
200
  table_few_shot_demo_key_value_list=request.tableFewShotDemoKeyValueList,
201
+ mllm_predict_info=request.mllmPredictInfo,
202
+ mllm_integration_strategy=request.mllmIntegrationStrategy,
200
203
  chat_bot_config=request.chatBotConfig,
201
204
  retriever_config=request.retrieverConfig,
202
205
  )
203
- if request.useVectorRetrieval is not None:
204
- kwargs["use_vector_retrieval"] = request.useVectorRetrieval
205
- if request.minCharacters is not None:
206
- kwargs["min_characters"] = request.minCharacters
207
- if request.mllmIntegrationStrategy is not None:
208
- kwargs["mllm_integration_strategy"] = request.mllmIntegrationStrategy
209
-
210
- result = await serving_utils.call_async(
211
- pipeline.pipeline.chat,
212
- request.keyList,
213
- request.visualInfo,
214
- **kwargs,
215
- )
216
206
 
217
207
  return AIStudioResultResponse[schema.ChatResult](
218
208
  logId=serving_utils.generate_log_id(),
@@ -0,0 +1,203 @@
1
+ # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Dict, List
16
+
17
+ from .....utils.deps import function_requires_deps, is_dep_available
18
+ from ...infra import utils as serving_utils
19
+ from ...infra.config import AppConfig
20
+ from ...infra.models import AIStudioResultResponse
21
+ from ...schemas import pp_doctranslation as schema
22
+ from .._app import create_app, primary_operation
23
+ from ._common import common
24
+ from ._common import ocr as ocr_common
25
+
26
+ if is_dep_available("fastapi"):
27
+ from fastapi import FastAPI
28
+
29
+
30
+ @function_requires_deps("fastapi")
31
+ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
32
+ pipeline.inintial_visual_predictor(pipeline.config)
33
+
34
+ app, ctx = create_app(
35
+ pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
36
+ )
37
+
38
+ ocr_common.update_app_context(ctx)
39
+
40
+ @primary_operation(
41
+ app,
42
+ schema.ANALYZE_IMAGES_ENDPOINT,
43
+ "analyzeImages",
44
+ )
45
+ async def _analyze_images(
46
+ request: schema.AnalyzeImagesRequest,
47
+ ) -> AIStudioResultResponse[schema.AnalyzeImagesResult]:
48
+ pipeline = ctx.pipeline
49
+
50
+ log_id = serving_utils.generate_log_id()
51
+ visualize_enabled = (
52
+ request.visualize if request.visualize is not None else ctx.config.visualize
53
+ )
54
+ images, data_info = await ocr_common.get_images(request, ctx)
55
+
56
+ result = await pipeline.call(
57
+ pipeline.pipeline.visual_predict,
58
+ images,
59
+ use_doc_orientation_classify=request.useDocOrientationClassify,
60
+ use_doc_unwarping=request.useDocUnwarping,
61
+ use_textline_orientation=request.useTextlineOrientation,
62
+ use_seal_recognition=request.useSealRecognition,
63
+ use_table_recognition=request.useTableRecognition,
64
+ use_formula_recognition=request.useFormulaRecognition,
65
+ use_chart_recognition=request.useChartRecognition,
66
+ use_region_detection=request.useRegionDetection,
67
+ layout_threshold=request.layoutThreshold,
68
+ layout_nms=request.layoutNms,
69
+ layout_unclip_ratio=request.layoutUnclipRatio,
70
+ layout_merge_bboxes_mode=request.layoutMergeBboxesMode,
71
+ text_det_limit_side_len=request.textDetLimitSideLen,
72
+ text_det_limit_type=request.textDetLimitType,
73
+ text_det_thresh=request.textDetThresh,
74
+ text_det_box_thresh=request.textDetBoxThresh,
75
+ text_det_unclip_ratio=request.textDetUnclipRatio,
76
+ text_rec_score_thresh=request.textRecScoreThresh,
77
+ seal_det_limit_side_len=request.sealDetLimitSideLen,
78
+ seal_det_limit_type=request.sealDetLimitType,
79
+ seal_det_thresh=request.sealDetThresh,
80
+ seal_det_box_thresh=request.sealDetBoxThresh,
81
+ seal_det_unclip_ratio=request.sealDetUnclipRatio,
82
+ seal_rec_score_thresh=request.sealRecScoreThresh,
83
+ use_wired_table_cells_trans_to_html=request.useWiredTableCellsTransToHtml,
84
+ use_wireless_table_cells_trans_to_html=request.useWirelessTableCellsTransToHtml,
85
+ use_table_orientation_classify=request.useTableOrientationClassify,
86
+ use_ocr_results_with_table_cells=request.useOcrResultsWithTableCells,
87
+ use_e2e_wired_table_rec_model=request.useE2eWiredTableRecModel,
88
+ use_e2e_wireless_table_rec_model=request.useE2eWirelessTableRecModel,
89
+ )
90
+
91
+ layout_parsing_results: List[Dict[str, Any]] = []
92
+ for i, (img, item) in enumerate(zip(images, result)):
93
+ pruned_res = common.prune_result(item["layout_parsing_result"].json["res"])
94
+ md_data = item["layout_parsing_result"].markdown
95
+ md_text = md_data["markdown_texts"]
96
+ md_imgs = await serving_utils.call_async(
97
+ common.postprocess_images,
98
+ md_data["markdown_images"],
99
+ log_id,
100
+ filename_template=f"markdown_{i}/{{key}}",
101
+ file_storage=ctx.extra["file_storage"],
102
+ return_urls=ctx.extra["return_img_urls"],
103
+ max_img_size=ctx.extra["max_output_img_size"],
104
+ )
105
+ md_flags = md_data["page_continuation_flags"]
106
+ if visualize_enabled:
107
+ imgs = {
108
+ "input_img": img,
109
+ **item["layout_parsing_result"].img,
110
+ }
111
+ imgs = await serving_utils.call_async(
112
+ common.postprocess_images,
113
+ imgs,
114
+ log_id,
115
+ filename_template=f"{{key}}_{i}.jpg",
116
+ file_storage=ctx.extra["file_storage"],
117
+ return_urls=ctx.extra["return_img_urls"],
118
+ max_img_size=ctx.extra["max_output_img_size"],
119
+ )
120
+ else:
121
+ imgs = {}
122
+ layout_parsing_results.append(
123
+ dict(
124
+ prunedResult=pruned_res,
125
+ markdown=dict(
126
+ text=md_text,
127
+ images=md_imgs,
128
+ isStart=md_flags[0],
129
+ isEnd=md_flags[1],
130
+ ),
131
+ outputImages=(
132
+ {k: v for k, v in imgs.items() if k != "input_img"}
133
+ if imgs
134
+ else None
135
+ ),
136
+ inputImage=imgs.get("input_img"),
137
+ )
138
+ )
139
+
140
+ return AIStudioResultResponse[schema.AnalyzeImagesResult](
141
+ logId=log_id,
142
+ result=schema.AnalyzeImagesResult(
143
+ layoutParsingResults=layout_parsing_results,
144
+ dataInfo=data_info,
145
+ ),
146
+ )
147
+
148
+ @primary_operation(
149
+ app,
150
+ schema.TRANSLATE_ENDPOINT,
151
+ "translate",
152
+ )
153
+ async def _translate(
154
+ request: schema.TranslateRequest,
155
+ ) -> AIStudioResultResponse[schema.TranslateResult]:
156
+ pipeline = ctx.pipeline
157
+
158
+ ori_md_info_list: List[Dict[str, Any]] = []
159
+ for i, item in enumerate(request.markdownList):
160
+ ori_md_info_list.append(
161
+ {
162
+ "input_path": None,
163
+ "page_index": i,
164
+ "markdown_texts": item.text,
165
+ "page_continuation_flags": (item.isStart, item.isEnd),
166
+ }
167
+ )
168
+
169
+ result = await serving_utils.call_async(
170
+ pipeline.pipeline.translate,
171
+ ori_md_info_list,
172
+ target_language=request.targetLanguage,
173
+ chunk_size=request.chunkSize,
174
+ task_description=request.taskDescription,
175
+ output_format=request.outputFormat,
176
+ rules_str=request.rulesStr,
177
+ few_shot_demo_text_content=request.fewShotDemoTextContent,
178
+ few_shot_demo_key_value_list=request.fewShotDemoKeyValueList,
179
+ chat_bot_config=request.chatBotConfig,
180
+ llm_request_interval=request.llmRequestInterval,
181
+ )
182
+
183
+ translation_results: List[Dict[str, Any]] = []
184
+ for item in result:
185
+ translation_results.append(
186
+ dict(
187
+ language=item["language"],
188
+ markdown=dict(
189
+ text=item["markdown_texts"],
190
+ isStart=item["page_continuation_flags"][0],
191
+ isEnd=item["page_continuation_flags"][1],
192
+ ),
193
+ )
194
+ )
195
+
196
+ return AIStudioResultResponse[schema.TranslateResult](
197
+ logId=serving_utils.generate_log_id(),
198
+ result=schema.TranslateResult(
199
+ translationResults=translation_results,
200
+ ),
201
+ )
202
+
203
+ return app
@@ -161,7 +161,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
161
161
  ) -> AIStudioResultResponse[schema.InferResult]:
162
162
  pipeline = ctx.pipeline
163
163
  aiohttp_session = ctx.aiohttp_session
164
-
164
+ visualize_enabled = (
165
+ request.visualize if request.visualize is not None else ctx.config.visualize
166
+ )
165
167
  image_bytes = await serving_utils.get_raw_bytes_async(
166
168
  request.image, aiohttp_session
167
169
  )
@@ -206,7 +208,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
206
208
  score=obj["det_score"],
207
209
  )
208
210
  )
209
- if ctx.config.visualize:
211
+ if visualize_enabled:
210
212
  output_image_base64 = serving_utils.base64_encode(
211
213
  serving_utils.image_to_bytes(result.img["res"])
212
214
  )
@@ -46,7 +46,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
46
46
  pipeline = ctx.pipeline
47
47
 
48
48
  log_id = serving_utils.generate_log_id()
49
-
49
+ visualize_enabled = (
50
+ request.visualize if request.visualize is not None else ctx.config.visualize
51
+ )
50
52
  images, data_info = await ocr_common.get_images(request, ctx)
51
53
 
52
54
  result = await pipeline.infer(
@@ -98,7 +100,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
98
100
  max_img_size=ctx.extra["max_output_img_size"],
99
101
  )
100
102
  md_flags = md_data["page_continuation_flags"]
101
- if ctx.config.visualize:
103
+ if visualize_enabled:
102
104
  imgs = {
103
105
  "input_img": img,
104
106
  **item.img,
@@ -43,7 +43,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
43
43
  async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
44
44
  pipeline = ctx.pipeline
45
45
  aiohttp_session = ctx.aiohttp_session
46
-
46
+ visualize_enabled = (
47
+ request.visualize if request.visualize is not None else ctx.config.visualize
48
+ )
47
49
  file_bytes = await serving_utils.get_raw_bytes_async(
48
50
  request.image, aiohttp_session
49
51
  )
@@ -66,7 +68,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
66
68
  score=obj["score"],
67
69
  )
68
70
  )
69
- if ctx.config.visualize:
71
+ if visualize_enabled:
70
72
  output_image_base64 = serving_utils.base64_encode(
71
73
  serving_utils.image_to_bytes(result.img["res"])
72
74
  )
@@ -44,7 +44,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
44
44
  pipeline = ctx.pipeline
45
45
 
46
46
  log_id = serving_utils.generate_log_id()
47
-
47
+ visualize_enabled = (
48
+ request.visualize if request.visualize is not None else ctx.config.visualize
49
+ )
48
50
  images, data_info = await ocr_common.get_images(request, ctx)
49
51
 
50
52
  result = await pipeline.infer(
@@ -67,7 +69,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
67
69
  seal_rec_results: List[Dict[str, Any]] = []
68
70
  for i, (img, item) in enumerate(zip(images, result)):
69
71
  pruned_res = common.prune_result(item.json["res"])
70
- if ctx.config.visualize:
72
+ if visualize_enabled:
71
73
  imgs = {
72
74
  "input_img": img,
73
75
  **item.img,
@@ -39,7 +39,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
39
39
  async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
40
40
  pipeline = ctx.pipeline
41
41
  aiohttp_session = ctx.aiohttp_session
42
-
42
+ visualize_enabled = (
43
+ request.visualize if request.visualize is not None else ctx.config.visualize
44
+ )
43
45
  file_bytes = await serving_utils.get_raw_bytes_async(
44
46
  request.image, aiohttp_session
45
47
  )
@@ -50,7 +52,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
50
52
  pred = result["pred"][0].tolist()
51
53
  size = [len(pred), len(pred[0])]
52
54
  label_map = [item for sublist in pred for item in sublist]
53
- if ctx.config.visualize:
55
+ if visualize_enabled:
54
56
  output_image_base64 = serving_utils.base64_encode(
55
57
  serving_utils.image_to_bytes(result.img["res"].convert("RGB"))
56
58
  )
@@ -39,7 +39,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
39
39
  async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
40
40
  pipeline = ctx.pipeline
41
41
  aiohttp_session = ctx.aiohttp_session
42
-
42
+ visualize_enabled = (
43
+ request.visualize if request.visualize is not None else ctx.config.visualize
44
+ )
43
45
  file_bytes = await serving_utils.get_raw_bytes_async(
44
46
  request.image, aiohttp_session
45
47
  )
@@ -57,7 +59,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
57
59
  score=obj["score"],
58
60
  )
59
61
  )
60
- if ctx.config.visualize:
62
+ if visualize_enabled:
61
63
  output_image_base64 = serving_utils.base64_encode(
62
64
  serving_utils.image_to_bytes(result.img["res"])
63
65
  )
@@ -44,7 +44,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
44
44
  pipeline = ctx.pipeline
45
45
 
46
46
  log_id = serving_utils.generate_log_id()
47
-
47
+ visualize_enabled = (
48
+ request.visualize if request.visualize is not None else ctx.config.visualize
49
+ )
48
50
  images, data_info = await ocr_common.get_images(request, ctx)
49
51
 
50
52
  result = await pipeline.infer(
@@ -65,7 +67,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
65
67
  table_rec_results: List[Dict[str, Any]] = []
66
68
  for i, (img, item) in enumerate(zip(images, result)):
67
69
  pruned_res = common.prune_result(item.json["res"])
68
- if ctx.config.visualize:
70
+ if visualize_enabled:
69
71
  imgs = {
70
72
  "input_img": img,
71
73
  **item.img,
@@ -44,7 +44,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
44
44
  pipeline = ctx.pipeline
45
45
 
46
46
  log_id = serving_utils.generate_log_id()
47
-
47
+ visualize_enabled = (
48
+ request.visualize if request.visualize is not None else ctx.config.visualize
49
+ )
48
50
  images, data_info = await ocr_common.get_images(request, ctx)
49
51
 
50
52
  result = await pipeline.infer(
@@ -70,7 +72,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
70
72
  table_rec_results: List[Dict[str, Any]] = []
71
73
  for i, (img, item) in enumerate(zip(images, result)):
72
74
  pruned_res = common.prune_result(item.json["res"])
73
- if ctx.config.visualize:
75
+ if visualize_enabled:
74
76
  imgs = {
75
77
  "input_img": img,
76
78
  **item.img,
@@ -39,7 +39,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
39
39
  async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
40
40
  pipeline = ctx.pipeline
41
41
  aiohttp_session = ctx.aiohttp_session
42
-
42
+ visualize_enabled = (
43
+ request.visualize if request.visualize is not None else ctx.config.visualize
44
+ )
43
45
  file_bytes = await serving_utils.get_raw_bytes_async(
44
46
  request.csv, aiohttp_session
45
47
  )
@@ -50,7 +52,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
50
52
  output_csv = serving_utils.base64_encode(
51
53
  serving_utils.data_frame_to_bytes(result["anomaly"])
52
54
  )
53
- if ctx.config.visualize:
55
+ if visualize_enabled:
54
56
  output_image = serving_utils.base64_encode(
55
57
  serving_utils.image_to_bytes(result.img["res"].convert("RGB"))
56
58
  )
@@ -39,7 +39,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
39
39
  async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
40
40
  pipeline = ctx.pipeline
41
41
  aiohttp_session = ctx.aiohttp_session
42
-
42
+ visualize_enabled = (
43
+ request.visualize if request.visualize is not None else ctx.config.visualize
44
+ )
43
45
  file_bytes = await serving_utils.get_raw_bytes_async(
44
46
  request.csv, aiohttp_session
45
47
  )
@@ -49,7 +51,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
49
51
 
50
52
  label = str(result["classification"].at[0, "classid"])
51
53
  score = float(result["classification"].at[0, "score"])
52
- if ctx.config.visualize:
54
+ if visualize_enabled:
53
55
  output_image = serving_utils.base64_encode(
54
56
  serving_utils.image_to_bytes(result.img["res"].convert("RGB"))
55
57
  )
@@ -39,7 +39,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
39
39
  async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
40
40
  pipeline = ctx.pipeline
41
41
  aiohttp_session = ctx.aiohttp_session
42
-
42
+ visualize_enabled = (
43
+ request.visualize if request.visualize is not None else ctx.config.visualize
44
+ )
43
45
  file_bytes = await serving_utils.get_raw_bytes_async(
44
46
  request.csv, aiohttp_session
45
47
  )
@@ -50,7 +52,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
50
52
  output_csv = serving_utils.base64_encode(
51
53
  serving_utils.data_frame_to_bytes(result["forecast"])
52
54
  )
53
- if ctx.config.visualize:
55
+ if visualize_enabled:
54
56
  output_image = serving_utils.base64_encode(
55
57
  serving_utils.image_to_bytes(result.img["res"].convert("RGB"))
56
58
  )
@@ -43,7 +43,9 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
43
43
  async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
44
44
  pipeline = ctx.pipeline
45
45
  aiohttp_session = ctx.aiohttp_session
46
-
46
+ visualize_enabled = (
47
+ request.visualize if request.visualize is not None else ctx.config.visualize
48
+ )
47
49
  file_bytes = await serving_utils.get_raw_bytes_async(
48
50
  request.image, aiohttp_session
49
51
  )
@@ -69,7 +71,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
69
71
  score=obj["det_score"],
70
72
  )
71
73
  )
72
- if ctx.config.visualize:
74
+ if visualize_enabled:
73
75
  output_image_base64 = serving_utils.base64_encode(
74
76
  serving_utils.image_to_bytes(result.img["res"])
75
77
  )
@@ -18,6 +18,7 @@ import io
18
18
  import mimetypes
19
19
  import re
20
20
  import tempfile
21
+ import threading
21
22
  import uuid
22
23
  from functools import partial
23
24
  from typing import Awaitable, Callable, List, Optional, Tuple, TypeVar, Union, overload
@@ -176,29 +177,33 @@ def base64_encode(data: bytes) -> str:
176
177
  return base64.b64encode(data).decode("ascii")
177
178
 
178
179
 
180
+ _lock = threading.Lock()
181
+
182
+
179
183
  @function_requires_deps("pypdfium2", "opencv-contrib-python")
180
184
  def read_pdf(
181
185
  bytes_: bytes, max_num_imgs: Optional[int] = None
182
186
  ) -> Tuple[List[np.ndarray], PDFInfo]:
183
187
  images: List[np.ndarray] = []
184
188
  page_info_list: List[PDFPageInfo] = []
185
- doc = pdfium.PdfDocument(bytes_)
186
- for page in doc:
187
- if max_num_imgs is not None and len(images) >= max_num_imgs:
188
- break
189
- # TODO: Do not always use zoom=2.0
190
- zoom = 2.0
191
- deg = 0
192
- image = page.render(scale=zoom, rotation=deg).to_pil()
193
- image = image.convert("RGB")
194
- image = np.array(image)
195
- image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
196
- images.append(image)
197
- page_info = PDFPageInfo(
198
- width=image.shape[1],
199
- height=image.shape[0],
200
- )
201
- page_info_list.append(page_info)
189
+ with _lock:
190
+ doc = pdfium.PdfDocument(bytes_)
191
+ for page in doc:
192
+ if max_num_imgs is not None and len(images) >= max_num_imgs:
193
+ break
194
+ # TODO: Do not always use zoom=2.0
195
+ zoom = 2.0
196
+ deg = 0
197
+ image = page.render(scale=zoom, rotation=deg).to_pil()
198
+ image = image.convert("RGB")
199
+ image = np.array(image)
200
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
201
+ images.append(image)
202
+ page_info = PDFPageInfo(
203
+ width=image.shape[1],
204
+ height=image.shape[0],
205
+ )
206
+ page_info_list.append(page_info)
202
207
  pdf_info = PDFInfo(
203
208
  numPages=len(page_info_list),
204
209
  pages=page_info_list,
@@ -26,6 +26,7 @@ INFER_ENDPOINT: Final[str] = "/image-anomaly-detection"
26
26
 
27
27
  class InferRequest(BaseModel):
28
28
  image: str
29
+ visualize: Optional[bool] = None
29
30
 
30
31
 
31
32
  class InferResult(BaseModel):
@@ -35,6 +35,7 @@ class InferRequest(ocr.BaseInferRequest):
35
35
  # consistent with the parameters of the wrapped function though.
36
36
  useDocOrientationClassify: Optional[bool] = None
37
37
  useDocUnwarping: Optional[bool] = None
38
+ visualize: Optional[bool] = None
38
39
 
39
40
 
40
41
  class DocPreprocessingResult(BaseModel):
@@ -90,6 +90,7 @@ class InferRequest(BaseModel):
90
90
  recThreshold: Optional[float] = None
91
91
  hammingRadius: Optional[float] = None
92
92
  topk: Optional[int] = None
93
+ visualize: Optional[bool] = None
93
94
 
94
95
 
95
96
  class RecResult(BaseModel):
@@ -38,6 +38,7 @@ class InferRequest(ocr.BaseInferRequest):
38
38
  layoutNms: Optional[bool] = None
39
39
  layoutUnclipRatio: Optional[Union[float, Tuple[float, float]]] = None
40
40
  layoutMergeBboxesMode: Optional[str] = None
41
+ visualize: Optional[bool] = None
41
42
 
42
43
 
43
44
  class FormulaRecResult(BaseModel):
@@ -36,6 +36,7 @@ INFER_ENDPOINT: Final[str] = "/human-keypoint-detection"
36
36
  class InferRequest(BaseModel):
37
37
  image: str
38
38
  detThreshold: Optional[float] = None
39
+ visualize: Optional[bool] = None
39
40
 
40
41
 
41
42
  class Person(BaseModel):
@@ -33,6 +33,7 @@ INFER_ENDPOINT: Final[str] = "/image-classification"
33
33
  class InferRequest(BaseModel):
34
34
  image: str
35
35
  topk: Optional[Annotated[int, Field(gt=0)]] = None
36
+ visualize: Optional[bool] = None
36
37
 
37
38
 
38
39
  class InferResult(BaseModel):
@@ -32,6 +32,7 @@ INFER_ENDPOINT: Final[str] = "/multilabel-image-classification"
32
32
 
33
33
  class InferRequest(BaseModel):
34
34
  image: str
35
+ visualize: Optional[bool] = None
35
36
  threshold: Optional[
36
37
  Union[float, Dict[Union[Literal["default"], int], float], List[float]]
37
38
  ] = None