nv-ingest-api 2025.7.16.dev20250716__py3-none-any.whl → 2025.7.17.dev20250717__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (22) hide show
  1. nv_ingest_api/interface/extract.py +18 -18
  2. nv_ingest_api/internal/extract/image/chart_extractor.py +75 -55
  3. nv_ingest_api/internal/extract/image/infographic_extractor.py +59 -35
  4. nv_ingest_api/internal/extract/image/table_extractor.py +81 -63
  5. nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +7 -7
  6. nv_ingest_api/internal/extract/pdf/engines/pdfium.py +9 -9
  7. nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +58 -0
  8. nv_ingest_api/internal/primitives/nim/model_interface/{paddle.py → ocr.py} +132 -39
  9. nv_ingest_api/internal/primitives/nim/nim_client.py +46 -11
  10. nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +6 -6
  11. nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +6 -6
  12. nv_ingest_api/internal/schemas/extract/extract_table_schema.py +5 -5
  13. nv_ingest_api/internal/transform/split_text.py +13 -8
  14. nv_ingest_api/util/image_processing/table_and_chart.py +97 -42
  15. nv_ingest_api/util/image_processing/transforms.py +16 -5
  16. nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +1 -1
  17. nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +51 -48
  18. {nv_ingest_api-2025.7.16.dev20250716.dist-info → nv_ingest_api-2025.7.17.dev20250717.dist-info}/METADATA +1 -1
  19. {nv_ingest_api-2025.7.16.dev20250716.dist-info → nv_ingest_api-2025.7.17.dev20250717.dist-info}/RECORD +22 -22
  20. {nv_ingest_api-2025.7.16.dev20250716.dist-info → nv_ingest_api-2025.7.17.dev20250717.dist-info}/WHEEL +0 -0
  21. {nv_ingest_api-2025.7.16.dev20250716.dist-info → nv_ingest_api-2025.7.17.dev20250717.dist-info}/licenses/LICENSE +0 -0
  22. {nv_ingest_api-2025.7.16.dev20250716.dist-info → nv_ingest_api-2025.7.17.dev20250717.dist-info}/top_level.txt +0 -0
@@ -15,10 +15,11 @@ import pandas as pd
15
15
 
16
16
  from nv_ingest_api.internal.schemas.meta.ingest_job_schema import IngestTaskTableExtraction
17
17
  from nv_ingest_api.internal.enums.common import TableFormatEnum
18
- from nv_ingest_api.internal.primitives.nim.model_interface.paddle import PaddleOCRModelInterface
18
+ from nv_ingest_api.internal.primitives.nim.model_interface.ocr import OCRModelInterface
19
+ from nv_ingest_api.internal.primitives.nim.model_interface.ocr import get_ocr_model_name
19
20
  from nv_ingest_api.internal.schemas.extract.extract_table_schema import TableExtractorSchema
20
- from nv_ingest_api.util.image_processing.table_and_chart import join_yolox_table_structure_and_paddle_output
21
- from nv_ingest_api.util.image_processing.table_and_chart import convert_paddle_response_to_psuedo_markdown
21
+ from nv_ingest_api.util.image_processing.table_and_chart import join_yolox_table_structure_and_ocr_output
22
+ from nv_ingest_api.util.image_processing.table_and_chart import convert_ocr_response_to_psuedo_markdown
22
23
  from nv_ingest_api.internal.primitives.nim import NimClient
23
24
  from nv_ingest_api.internal.primitives.nim.model_interface.yolox import YoloxTableStructureModelInterface
24
25
  from nv_ingest_api.util.image_processing.transforms import base64_to_numpy
@@ -60,7 +61,8 @@ def _filter_valid_images(base64_images: List[str]) -> Tuple[List[str], List[np.n
60
61
  def _run_inference(
61
62
  enable_yolox: bool,
62
63
  yolox_client: Any,
63
- paddle_client: Any,
64
+ ocr_client: Any,
65
+ ocr_model_name: str,
64
66
  valid_arrays: List[np.ndarray],
65
67
  valid_images: List[str],
66
68
  trace_info: Optional[Dict] = None,
@@ -68,31 +70,42 @@ def _run_inference(
68
70
  """
69
71
  Run inference concurrently for YOLOX (if enabled) and Paddle.
70
72
 
71
- Returns a tuple of (yolox_results, paddle_results).
73
+ Returns a tuple of (yolox_results, ocr_results).
72
74
  """
73
- data_paddle = {"base64_images": valid_images}
75
+ data_ocr = {"base64_images": valid_images}
74
76
  if enable_yolox:
75
77
  data_yolox = {"images": valid_arrays}
78
+ future_yolox_kwargs = dict(
79
+ data=data_yolox,
80
+ model_name="yolox",
81
+ stage_name="table_extraction",
82
+ max_batch_size=8,
83
+ trace_info=trace_info,
84
+ )
85
+
86
+ future_ocr_kwargs = dict(
87
+ data=data_ocr,
88
+ stage_name="table_extraction",
89
+ max_batch_size=1 if ocr_client.protocol == "grpc" else 2,
90
+ trace_info=trace_info,
91
+ )
92
+ if ocr_model_name == "paddle":
93
+ future_ocr_kwargs.update(
94
+ model_name="paddle",
95
+ )
96
+ else:
97
+ future_ocr_kwargs.update(
98
+ model_name="scene_text",
99
+ input_names=["input", "merge_levels"],
100
+ dtypes=["FP32", "BYTES"],
101
+ merge_level="word",
102
+ )
76
103
 
77
104
  with ThreadPoolExecutor(max_workers=2) as executor:
78
105
  future_yolox = None
79
106
  if enable_yolox:
80
- future_yolox = executor.submit(
81
- yolox_client.infer,
82
- data=data_yolox,
83
- model_name="yolox",
84
- stage_name="table_extraction",
85
- max_batch_size=8,
86
- trace_info=trace_info,
87
- )
88
- future_paddle = executor.submit(
89
- paddle_client.infer,
90
- data=data_paddle,
91
- model_name="paddle",
92
- stage_name="table_extraction",
93
- max_batch_size=1 if paddle_client.protocol == "grpc" else 2,
94
- trace_info=trace_info,
95
- )
107
+ future_yolox = executor.submit(yolox_client.infer, **future_yolox_kwargs)
108
+ future_ocr = executor.submit(ocr_client.infer, **future_ocr_kwargs)
96
109
 
97
110
  if enable_yolox:
98
111
  try:
@@ -104,17 +117,17 @@ def _run_inference(
104
117
  yolox_results = [None] * len(valid_images)
105
118
 
106
119
  try:
107
- paddle_results = future_paddle.result()
120
+ ocr_results = future_ocr.result()
108
121
  except Exception as e:
109
- logger.error(f"Error calling paddle_client.infer: {e}", exc_info=True)
122
+ logger.error(f"Error calling ocr_client.infer: {e}", exc_info=True)
110
123
  raise
111
124
 
112
- return yolox_results, paddle_results
125
+ return yolox_results, ocr_results
113
126
 
114
127
 
115
128
  def _validate_inference_results(
116
129
  yolox_results: Any,
117
- paddle_results: Any,
130
+ ocr_results: Any,
118
131
  valid_arrays: List[Any],
119
132
  valid_images: List[str],
120
133
  ) -> Tuple[List[Any], List[Any]]:
@@ -123,46 +136,47 @@ def _validate_inference_results(
123
136
 
124
137
  If not, default values are assigned. Raises a ValueError if the lengths do not match.
125
138
  """
126
- if not isinstance(yolox_results, list) or not isinstance(paddle_results, list):
139
+ if not isinstance(yolox_results, list) or not isinstance(ocr_results, list):
127
140
  logger.warning(
128
- "Unexpected result types from inference clients: yolox_results=%s, paddle_results=%s. "
141
+ "Unexpected result types from inference clients: yolox_results=%s, ocr_results=%s. "
129
142
  "Proceeding with available results.",
130
143
  type(yolox_results).__name__,
131
- type(paddle_results).__name__,
144
+ type(ocr_results).__name__,
132
145
  )
133
146
  if not isinstance(yolox_results, list):
134
147
  yolox_results = [None] * len(valid_arrays)
135
- if not isinstance(paddle_results, list):
136
- paddle_results = [(None, None)] * len(valid_images)
148
+ if not isinstance(ocr_results, list):
149
+ ocr_results = [(None, None)] * len(valid_images)
137
150
 
138
151
  if len(yolox_results) != len(valid_arrays):
139
152
  raise ValueError(f"Expected {len(valid_arrays)} yolox results, got {len(yolox_results)}")
140
- if len(paddle_results) != len(valid_images):
141
- raise ValueError(f"Expected {len(valid_images)} paddle results, got {len(paddle_results)}")
153
+ if len(ocr_results) != len(valid_images):
154
+ raise ValueError(f"Expected {len(valid_images)} ocr results, got {len(ocr_results)}")
142
155
 
143
- return yolox_results, paddle_results
156
+ return yolox_results, ocr_results
144
157
 
145
158
 
146
159
  def _update_table_metadata(
147
160
  base64_images: List[str],
148
161
  yolox_client: Any,
149
- paddle_client: Any,
162
+ ocr_client: Any,
163
+ ocr_model_name: str,
150
164
  worker_pool_size: int = 8, # Not currently used
151
165
  enable_yolox: bool = False,
152
166
  trace_info: Optional[Dict] = None,
153
167
  ) -> List[Tuple[str, Any, Any, Any]]:
154
168
  """
155
169
  Given a list of base64-encoded images, this function filters out images that do not meet
156
- the minimum size requirements and then calls the PaddleOCR model via paddle_client.infer
170
+ the minimum size requirements and then calls the OCR model via ocr_client.infer
157
171
  to extract table data.
158
172
 
159
173
  For each base64-encoded image, the result is a tuple:
160
- (base64_image, yolox_result, paddle_text_predictions, paddle_bounding_boxes)
174
+ (base64_image, yolox_result, ocr_text_predictions, ocr_bounding_boxes)
161
175
 
162
176
  Images that do not meet the minimum size are skipped (resulting in placeholders).
163
- The paddle_client is expected to handle any necessary batching and concurrency.
177
+ The ocr_client is expected to handle any necessary batching and concurrency.
164
178
  """
165
- logger.debug(f"Running table extraction using protocol {paddle_client.protocol}")
179
+ logger.debug(f"Running table extraction using protocol {ocr_client.protocol}")
166
180
 
167
181
  # Initialize the results list with default placeholders.
168
182
  results: List[Tuple[str, Any, Any, Any]] = [("", None, None, None)] * len(base64_images)
@@ -174,24 +188,23 @@ def _update_table_metadata(
174
188
  return results
175
189
 
176
190
  # Run inference concurrently.
177
- yolox_results, paddle_results = _run_inference(
191
+ yolox_results, ocr_results = _run_inference(
178
192
  enable_yolox=enable_yolox,
179
193
  yolox_client=yolox_client,
180
- paddle_client=paddle_client,
194
+ ocr_client=ocr_client,
195
+ ocr_model_name=ocr_model_name,
181
196
  valid_arrays=valid_arrays,
182
197
  valid_images=valid_images,
183
198
  trace_info=trace_info,
184
199
  )
185
200
 
186
201
  # Validate that the inference results have the expected structure.
187
- yolox_results, paddle_results = _validate_inference_results(
188
- yolox_results, paddle_results, valid_arrays, valid_images
189
- )
202
+ yolox_results, ocr_results = _validate_inference_results(yolox_results, ocr_results, valid_arrays, valid_images)
190
203
 
191
204
  # Combine results with the original order.
192
- for idx, (yolox_res, paddle_res) in enumerate(zip(yolox_results, paddle_results)):
205
+ for idx, (yolox_res, ocr_res) in enumerate(zip(yolox_results, ocr_results)):
193
206
  original_index = valid_indices[idx]
194
- results[original_index] = (base64_images[original_index], yolox_res, paddle_res[0], paddle_res[1])
207
+ results[original_index] = (base64_images[original_index], yolox_res, ocr_res[0], ocr_res[1])
195
208
 
196
209
  return results
197
210
 
@@ -199,14 +212,14 @@ def _update_table_metadata(
199
212
  def _create_clients(
200
213
  yolox_endpoints: Tuple[str, str],
201
214
  yolox_protocol: str,
202
- paddle_endpoints: Tuple[str, str],
203
- paddle_protocol: str,
215
+ ocr_endpoints: Tuple[str, str],
216
+ ocr_protocol: str,
204
217
  auth_token: str,
205
218
  ) -> Tuple[NimClient, NimClient]:
206
219
  yolox_model_interface = YoloxTableStructureModelInterface()
207
- paddle_model_interface = PaddleOCRModelInterface()
220
+ ocr_model_interface = OCRModelInterface()
208
221
 
209
- logger.debug(f"Inference protocols: yolox={yolox_protocol}, paddle={paddle_protocol}")
222
+ logger.debug(f"Inference protocols: yolox={yolox_protocol}, ocr={ocr_protocol}")
210
223
 
211
224
  yolox_client = create_inference_client(
212
225
  endpoints=yolox_endpoints,
@@ -215,14 +228,14 @@ def _create_clients(
215
228
  infer_protocol=yolox_protocol,
216
229
  )
217
230
 
218
- paddle_client = create_inference_client(
219
- endpoints=paddle_endpoints,
220
- model_interface=paddle_model_interface,
231
+ ocr_client = create_inference_client(
232
+ endpoints=ocr_endpoints,
233
+ model_interface=ocr_model_interface,
221
234
  auth_token=auth_token,
222
- infer_protocol=paddle_protocol,
235
+ infer_protocol=ocr_protocol,
223
236
  )
224
237
 
225
- return yolox_client, paddle_client
238
+ return yolox_client, ocr_client
226
239
 
227
240
 
228
241
  def extract_table_data_from_image_internal(
@@ -262,14 +275,18 @@ def extract_table_data_from_image_internal(
262
275
  return df_extraction_ledger, execution_trace_log
263
276
 
264
277
  endpoint_config = extraction_config.endpoint_config
265
- yolox_client, paddle_client = _create_clients(
278
+ yolox_client, ocr_client = _create_clients(
266
279
  endpoint_config.yolox_endpoints,
267
280
  endpoint_config.yolox_infer_protocol,
268
- endpoint_config.paddle_endpoints,
269
- endpoint_config.paddle_infer_protocol,
281
+ endpoint_config.ocr_endpoints,
282
+ endpoint_config.ocr_infer_protocol,
270
283
  endpoint_config.auth_token,
271
284
  )
272
285
 
286
+ # Get the grpc endpoint to determine the model if needed
287
+ ocr_grpc_endpoint = endpoint_config.ocr_endpoints[0]
288
+ ocr_model_name = get_ocr_model_name(ocr_grpc_endpoint)
289
+
273
290
  try:
274
291
  # 1) Identify rows that meet criteria (structured, subtype=table, table_metadata != None, content not empty)
275
292
  def meets_criteria(row):
@@ -309,7 +326,8 @@ def extract_table_data_from_image_internal(
309
326
  bulk_results = _update_table_metadata(
310
327
  base64_images=base64_images,
311
328
  yolox_client=yolox_client,
312
- paddle_client=paddle_client,
329
+ ocr_client=ocr_client,
330
+ ocr_model_name=ocr_model_name,
313
331
  worker_pool_size=endpoint_config.workers_per_progress_engine,
314
332
  enable_yolox=enable_yolox,
315
333
  trace_info=execution_trace_log,
@@ -317,15 +335,15 @@ def extract_table_data_from_image_internal(
317
335
 
318
336
  # 4) Write the results (bounding_boxes, text_predictions) back
319
337
  for row_id, idx in enumerate(valid_indices):
320
- # unpack (base64_image, (yolox_predictions, paddle_bounding boxes, paddle_text_predictions))
338
+ # unpack (base64_image, (yolox_predictions, ocr_bounding boxes, ocr_text_predictions))
321
339
  _, cell_predictions, bounding_boxes, text_predictions = bulk_results[row_id]
322
340
 
323
341
  if table_content_format == TableFormatEnum.SIMPLE:
324
342
  table_content = " ".join(text_predictions)
325
343
  elif table_content_format == TableFormatEnum.PSEUDO_MARKDOWN:
326
- table_content = convert_paddle_response_to_psuedo_markdown(bounding_boxes, text_predictions)
344
+ table_content = convert_ocr_response_to_psuedo_markdown(bounding_boxes, text_predictions)
327
345
  elif table_content_format == TableFormatEnum.MARKDOWN:
328
- table_content = join_yolox_table_structure_and_paddle_output(
346
+ table_content = join_yolox_table_structure_and_ocr_output(
329
347
  cell_predictions, bounding_boxes, text_predictions
330
348
  )
331
349
  else:
@@ -341,4 +359,4 @@ def extract_table_data_from_image_internal(
341
359
  raise
342
360
  finally:
343
361
  yolox_client.close()
344
- paddle_client.close()
362
+ ocr_client.close()
@@ -101,7 +101,7 @@ def nemoretriever_parse_extractor(
101
101
  - text_depth : str, optional (default is "page")
102
102
  - extract_tables_method : str, optional (default is "yolox")
103
103
  - identify_nearby_objects : bool, optional (default is True)
104
- - paddle_output_format : str, optional (default is "pseudo_markdown")
104
+ - table_output_format : str, optional (default is "pseudo_markdown")
105
105
  - pdfium_config : dict, optional (configuration for PDFium)
106
106
  - nemoretriever_parse_config : dict, optional (configuration for NemoRetrieverParse)
107
107
  - metadata_column : str, optional (default is "metadata")
@@ -146,14 +146,14 @@ def nemoretriever_parse_extractor(
146
146
  # Flag for identifying nearby objects.
147
147
  identify_nearby_objects = extractor_config.get("identify_nearby_objects", True)
148
148
 
149
- # Get and validate paddle_output_format.
150
- paddle_output_format_str = extractor_config.get("paddle_output_format", "pseudo_markdown")
149
+ # Get and validate table_output_format.
150
+ table_output_format_str = extractor_config.get("table_output_format", "pseudo_markdown")
151
151
  try:
152
- paddle_output_format = TableFormatEnum[paddle_output_format_str.upper()]
152
+ table_output_format = TableFormatEnum[table_output_format_str.upper()]
153
153
  except KeyError:
154
154
  valid_options = [e.name.lower() for e in TableFormatEnum]
155
155
  raise ValueError(
156
- f"Invalid paddle_output_format value: {paddle_output_format_str}. Expected one of: {valid_options}"
156
+ f"Invalid table_output_format value: {table_output_format_str}. Expected one of: {valid_options}"
157
157
  )
158
158
 
159
159
  # Process nemoretriever_parse configuration.
@@ -254,7 +254,7 @@ def nemoretriever_parse_extractor(
254
254
  extract_tables,
255
255
  extract_charts,
256
256
  extract_infographics,
257
- paddle_output_format,
257
+ table_output_format,
258
258
  nemoretriever_parse_config.yolox_endpoints,
259
259
  nemoretriever_parse_config.yolox_infer_protocol,
260
260
  nemoretriever_parse_config.auth_token,
@@ -288,7 +288,7 @@ def nemoretriever_parse_extractor(
288
288
  extract_tables,
289
289
  extract_charts,
290
290
  extract_infographics,
291
- paddle_output_format,
291
+ table_output_format,
292
292
  nemoretriever_parse_config.yolox_endpoints,
293
293
  nemoretriever_parse_config.yolox_infer_protocol,
294
294
  nemoretriever_parse_config.auth_token,
@@ -267,7 +267,7 @@ def _extract_page_elements(
267
267
  extract_tables: bool,
268
268
  extract_charts: bool,
269
269
  extract_infographics: bool,
270
- paddle_output_format: str,
270
+ table_output_format: str,
271
271
  yolox_endpoints: Tuple[Optional[str], Optional[str]],
272
272
  yolox_infer_protocol: str = "http",
273
273
  auth_token: Optional[str] = None,
@@ -296,7 +296,7 @@ def _extract_page_elements(
296
296
  Flag indicating whether to extract charts.
297
297
  extract_infographics : bool
298
298
  Flag indicating whether to extract infographics.
299
- paddle_output_format : str
299
+ table_output_format : str
300
300
  Format to use for table content.
301
301
  yolox_endpoints : Tuple[Optional[str], Optional[str]]
302
302
  A tuple containing the gRPC and HTTP endpoints for the YOLOX service.
@@ -355,7 +355,7 @@ def _extract_page_elements(
355
355
 
356
356
  # Set content format for tables
357
357
  if page_element.type_string == "table":
358
- page_element.content_format = paddle_output_format
358
+ page_element.content_format = table_output_format
359
359
 
360
360
  # Construct metadata for the page element
361
361
  page_element_meta = construct_page_element_metadata(
@@ -412,13 +412,13 @@ def pdfium_extractor(
412
412
  f"Invalid text_depth: {text_depth_str}. Valid options: {list(TextTypeEnum.__members__.keys())}"
413
413
  )
414
414
 
415
- # Validate and extract paddle_output_format
416
- paddle_output_format_str = extractor_config.get("paddle_output_format", "pseudo_markdown")
415
+ # Validate and extract table_output_format
416
+ table_output_format_str = extractor_config.get("table_output_format", "pseudo_markdown")
417
417
  try:
418
- paddle_output_format = TableFormatEnum[paddle_output_format_str.upper()]
418
+ table_output_format = TableFormatEnum[table_output_format_str.upper()]
419
419
  except KeyError:
420
420
  raise ValueError(
421
- f"Invalid paddle_output_format: {paddle_output_format_str}. "
421
+ f"Invalid table_output_format: {table_output_format_str}. "
422
422
  f"Valid options: {list(TableFormatEnum.__members__.keys())}"
423
423
  )
424
424
 
@@ -568,7 +568,7 @@ def pdfium_extractor(
568
568
  extract_tables,
569
569
  extract_charts,
570
570
  extract_infographics,
571
- paddle_output_format,
571
+ table_output_format,
572
572
  pdfium_config.yolox_endpoints,
573
573
  pdfium_config.yolox_infer_protocol,
574
574
  pdfium_config.auth_token,
@@ -590,7 +590,7 @@ def pdfium_extractor(
590
590
  extract_tables,
591
591
  extract_charts,
592
592
  extract_infographics,
593
- paddle_output_format,
593
+ table_output_format,
594
594
  pdfium_config.yolox_endpoints,
595
595
  pdfium_config.yolox_infer_protocol,
596
596
  pdfium_config.auth_token,
@@ -3,6 +3,7 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  import logging
6
+ from typing import Optional
6
7
 
7
8
  import backoff
8
9
  import cv2
@@ -81,6 +82,63 @@ def preprocess_image_for_paddle(array: np.ndarray, image_max_dimension: int = 96
81
82
  return transposed, metadata
82
83
 
83
84
 
85
+ def preprocess_image_for_ocr(
86
+ array: np.ndarray,
87
+ target_height: Optional[int] = None,
88
+ target_width: Optional[int] = None,
89
+ pad_how: str = "bottom_right",
90
+ ) -> np.ndarray:
91
+ """
92
+ Preprocesses an input image to be suitable for use with NemoRetriever-OCR.
93
+
94
+ This function is intended for preprocessing images to be passed as input to NemoRetriever-OCR using GRPC.
95
+ It is not necessary when using the HTTP endpoint.
96
+
97
+ Parameters:
98
+ ----------
99
+ array : np.ndarray
100
+ The input image array of shape (height, width, channels). It should have pixel values in the range [0, 255].
101
+
102
+ Returns:
103
+ -------
104
+ np.ndarray
105
+ A preprocessed image with the shape (channels, height, width).
106
+ """
107
+ height, width = array.shape[:2]
108
+
109
+ if target_height is None:
110
+ target_height = height
111
+
112
+ if target_width is None:
113
+ target_width = width
114
+
115
+ padded, (pad_width, pad_height) = pad_image(
116
+ array,
117
+ target_height=target_height,
118
+ target_width=target_width,
119
+ background_color=255,
120
+ dtype=np.float32,
121
+ how=pad_how,
122
+ )
123
+
124
+ padded = padded / 255.0
125
+
126
+ # NemoRetriever-OCR NIM (GRPC) requires input to be (channel, height, width).
127
+ transposed = padded.transpose((2, 0, 1))
128
+
129
+ # Metadata can used for inverting transformations on the resulting bounding boxes.
130
+ metadata = {
131
+ "original_height": height,
132
+ "original_width": width,
133
+ "new_height": target_height,
134
+ "new_width": target_width,
135
+ "pad_height": pad_height,
136
+ "pad_width": pad_width,
137
+ }
138
+
139
+ return transposed, metadata
140
+
141
+
84
142
  def is_ready(http_endpoint: str, ready_endpoint: str) -> bool:
85
143
  """
86
144
  Check if the server at the given endpoint is ready.