nv-ingest-api 25.7.6.dev20250706__py3-none-any.whl → 25.8.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (33) hide show
  1. nv_ingest_api/interface/extract.py +18 -18
  2. nv_ingest_api/internal/enums/common.py +6 -0
  3. nv_ingest_api/internal/extract/image/chart_extractor.py +80 -75
  4. nv_ingest_api/internal/extract/image/image_helpers/common.py +5 -6
  5. nv_ingest_api/internal/extract/image/infographic_extractor.py +59 -35
  6. nv_ingest_api/internal/extract/image/table_extractor.py +84 -64
  7. nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +9 -8
  8. nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +32 -20
  9. nv_ingest_api/internal/extract/pdf/engines/pdfium.py +40 -29
  10. nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +59 -0
  11. nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +1 -0
  12. nv_ingest_api/internal/primitives/nim/model_interface/{paddle.py → ocr.py} +132 -39
  13. nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +44 -236
  14. nv_ingest_api/internal/primitives/nim/nim_client.py +61 -18
  15. nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +6 -6
  16. nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +6 -6
  17. nv_ingest_api/internal/schemas/extract/extract_table_schema.py +5 -5
  18. nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +5 -0
  19. nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +2 -2
  20. nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +4 -0
  21. nv_ingest_api/internal/transform/embed_text.py +103 -12
  22. nv_ingest_api/internal/transform/split_text.py +13 -8
  23. nv_ingest_api/util/image_processing/table_and_chart.py +97 -42
  24. nv_ingest_api/util/image_processing/transforms.py +351 -87
  25. nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +1 -1
  26. nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +51 -48
  27. nv_ingest_api/util/metadata/aggregators.py +4 -1
  28. nv_ingest_api/util/pdf/pdfium.py +6 -14
  29. {nv_ingest_api-25.7.6.dev20250706.dist-info → nv_ingest_api-25.8.0rc1.dist-info}/METADATA +2 -1
  30. {nv_ingest_api-25.7.6.dev20250706.dist-info → nv_ingest_api-25.8.0rc1.dist-info}/RECORD +33 -33
  31. {nv_ingest_api-25.7.6.dev20250706.dist-info → nv_ingest_api-25.8.0rc1.dist-info}/WHEEL +0 -0
  32. {nv_ingest_api-25.7.6.dev20250706.dist-info → nv_ingest_api-25.8.0rc1.dist-info}/licenses/LICENSE +0 -0
  33. {nv_ingest_api-25.7.6.dev20250706.dist-info → nv_ingest_api-25.8.0rc1.dist-info}/top_level.txt +0 -0
@@ -2,29 +2,55 @@
2
2
  # All rights reserved.
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
- import base64
6
- import io
7
5
  import logging
8
- from io import BytesIO
9
6
  from math import ceil
10
7
  from math import floor
11
8
  from typing import Optional
12
9
  from typing import Tuple
13
10
 
11
+ import cv2
14
12
  import numpy as np
13
+ from io import BytesIO
15
14
  from PIL import Image
16
- from PIL import UnidentifiedImageError
17
15
 
18
16
  from nv_ingest_api.util.converters import bytetools
19
17
 
18
+ # Configure OpenCV to use a single thread for image processing
19
+ cv2.setNumThreads(1)
20
20
  DEFAULT_MAX_WIDTH = 1024
21
21
  DEFAULT_MAX_HEIGHT = 1280
22
22
 
23
+ # Workaround for PIL.Image.DecompressionBombError
24
+ Image.MAX_IMAGE_PIXELS = None
25
+
23
26
  logger = logging.getLogger(__name__)
24
27
 
25
28
 
29
+ def _resize_image_opencv(
30
+ array: np.ndarray, target_size: Tuple[int, int], interpolation=cv2.INTER_LANCZOS4
31
+ ) -> np.ndarray:
32
+ """
33
+ Resizes a NumPy array representing an image using OpenCV.
34
+
35
+ Parameters
36
+ ----------
37
+ array : np.ndarray
38
+ The input image as a NumPy array.
39
+ target_size : Tuple[int, int]
40
+ The target size as (width, height).
41
+ interpolation : int, optional
42
+ OpenCV interpolation method. Defaults to cv2.INTER_LANCZOS4.
43
+
44
+ Returns
45
+ -------
46
+ np.ndarray
47
+ The resized image as a NumPy array.
48
+ """
49
+ return cv2.resize(array, target_size, interpolation=interpolation)
50
+
51
+
26
52
  def scale_image_to_encoding_size(
27
- base64_image: str, max_base64_size: int = 180_000, initial_reduction: float = 0.9
53
+ base64_image: str, max_base64_size: int = 180_000, initial_reduction: float = 0.9, format: str = "PNG", **kwargs
28
54
  ) -> Tuple[str, Tuple[int, int]]:
29
55
  """
30
56
  Decodes a base64-encoded image, resizes it if needed, and re-encodes it as base64.
@@ -38,12 +64,19 @@ def scale_image_to_encoding_size(
38
64
  Maximum allowable size for the base64-encoded image, by default 180,000 characters.
39
65
  initial_reduction : float, optional
40
66
  Initial reduction step for resizing, by default 0.9.
67
+ format : str, optional
68
+ The image format to use for encoding. Supported formats are "PNG" and "JPEG".
69
+ Defaults to "PNG".
70
+ **kwargs
71
+ Additional keyword arguments passed to the format-specific encoding function.
72
+ For JPEG: quality (int, default=100) - JPEG quality (1-100).
73
+ For PNG: compression (int, default=3) - PNG compression level (0-9).
41
74
 
42
75
  Returns
43
76
  -------
44
77
  Tuple[str, Tuple[int, int]]
45
78
  A tuple containing:
46
- - Base64-encoded PNG image string, resized if necessary.
79
+ - Base64-encoded image string in the specified format, resized if necessary.
47
80
  - The new size as a tuple (width, height).
48
81
 
49
82
  Raises
@@ -52,12 +85,11 @@ def scale_image_to_encoding_size(
52
85
  If the image cannot be resized below the specified max_base64_size.
53
86
  """
54
87
  try:
55
- # Decode the base64 image and open it as a PIL image
56
- image_data = base64.b64decode(base64_image)
57
- img = Image.open(io.BytesIO(image_data)).convert("RGB")
88
+ # Decode the base64 image using OpenCV (returns RGB format)
89
+ img_array = base64_to_numpy(base64_image)
58
90
 
59
- # Initial image size
60
- original_size = img.size
91
+ # Initial image size (height, width, channels) -> (width, height)
92
+ original_size = (img_array.shape[1], img_array.shape[0])
61
93
 
62
94
  # Check initial size
63
95
  if len(base64_image) <= max_base64_size:
@@ -66,23 +98,24 @@ def scale_image_to_encoding_size(
66
98
  # Initial reduction step
67
99
  reduction_step = initial_reduction
68
100
  new_size = original_size
101
+ current_img = img_array.copy()
102
+ original_width, original_height = original_size
103
+
69
104
  while len(base64_image) > max_base64_size:
70
- width, height = img.size
71
- new_size = (int(width * reduction_step), int(height * reduction_step))
105
+ new_size = (int(original_width * reduction_step), int(original_height * reduction_step))
106
+ if new_size[0] < 1 or new_size[1] < 1:
107
+ raise ValueError("Image cannot be resized further without becoming too small.")
108
+
109
+ # Resize the image using OpenCV
110
+ current_img = _resize_image_opencv(img_array, new_size)
72
111
 
73
- img_resized = img.resize(new_size, Image.LANCZOS)
74
- buffered = io.BytesIO()
75
- img_resized.save(buffered, format="PNG")
76
- base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
112
+ # Re-encode as base64 using the specified format
113
+ base64_image = numpy_to_base64(current_img, format=format, **kwargs)
77
114
 
78
115
  # Adjust the reduction step if necessary
79
116
  if len(base64_image) > max_base64_size:
80
117
  reduction_step *= 0.95 # Reduce size further if needed
81
118
 
82
- # Safety check
83
- if new_size[0] < 1 or new_size[1] < 1:
84
- raise Exception("Image cannot be resized further without becoming too small.")
85
-
86
119
  return base64_image, new_size
87
120
 
88
121
  except Exception as e:
@@ -90,36 +123,84 @@ def scale_image_to_encoding_size(
90
123
  raise
91
124
 
92
125
 
93
- def ensure_base64_is_png(base64_image: str) -> str:
126
+ def _detect_base64_image_format(base64_string: str) -> Optional[str]:
94
127
  """
95
- Ensures the given base64-encoded image is in PNG format. Converts to PNG if necessary.
128
+ Detects the format of a base64-encoded image using Pillow.
96
129
 
97
130
  Parameters
98
131
  ----------
99
- base64_image : str
132
+ base64_string : str
100
133
  Base64-encoded image string.
101
134
 
102
135
  Returns
103
136
  -------
104
- str
105
- Base64-encoded PNG image string.
137
+ The detected format ("PNG", "JPEG", "UNKNOWN")
106
138
  """
107
139
  try:
108
- # Decode the base64 string and load the image
109
- image_data = base64.b64decode(base64_image)
110
- image = Image.open(io.BytesIO(image_data))
140
+ image_bytes = bytetools.bytesfrombase64(base64_string)
141
+ except Exception as e:
142
+ logger.error(f"Invalid base64 string: {e}")
143
+ raise ValueError(f"Invalid base64 string: {e}") from e
111
144
 
112
- # Check if the image is already in PNG format
113
- if image.format != "PNG":
114
- # Convert the image to PNG
115
- buffered = io.BytesIO()
116
- image.convert("RGB").save(buffered, format="PNG")
117
- base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
145
+ try:
146
+ with Image.open(BytesIO(image_bytes)) as img:
147
+ return img.format.upper()
148
+ except ImportError:
149
+ raise ImportError("Pillow library not available")
150
+ except Exception as e:
151
+ logger.error(f"Error detecting image format: {e}")
152
+ return "UNKNOWN"
118
153
 
154
+
155
+ def ensure_base64_format(base64_image: str, target_format: str = "PNG", **kwargs) -> str:
156
+ """
157
+ Ensures the given base64-encoded image is in the specified format. Converts if necessary.
158
+ Skips conversion if the image is already in the target format.
159
+
160
+ Parameters
161
+ ----------
162
+ base64_image : str
163
+ Base64-encoded image string.
164
+ target_format : str, optional
165
+ The target image format. Supported formats are "PNG" and "JPEG". Defaults to "PNG".
166
+ **kwargs
167
+ Additional keyword arguments passed to the format-specific encoding function.
168
+ For JPEG: quality (int, default=100) - JPEG quality (1-100).
169
+ For PNG: compression (int, default=3) - PNG compression level (0-9).
170
+
171
+ Returns
172
+ -------
173
+ str
174
+ Base64-encoded image string in the specified format.
175
+
176
+ Raises
177
+ ------
178
+ ValueError
179
+ If there is an error during format conversion.
180
+ """
181
+ target_format = target_format.upper()
182
+ if target_format == "JPG":
183
+ target_format = "JPEG"
184
+
185
+ current_format = _detect_base64_image_format(base64_image)
186
+ if current_format == "UNKNOWN":
187
+ raise ValueError(
188
+ f"Unable to decode image from base64 string: {base64_image}, because current format could not be detected."
189
+ )
190
+ if current_format == target_format:
191
+ logger.debug(f"Image already in {target_format} format, skipping conversion")
119
192
  return base64_image
193
+
194
+ try:
195
+ # Decode the base64 image using OpenCV (returns RGB format)
196
+ img_array = base64_to_numpy(base64_image)
197
+ # Re-encode in the target format
198
+ return numpy_to_base64(img_array, format=target_format, **kwargs)
199
+ except ImportError as e:
200
+ raise e
120
201
  except Exception as e:
121
- logger.error(f"Error ensuring PNG format: {e}")
122
- return None
202
+ logger.error(f"Error converting image to {target_format} format: {e}")
203
+ raise ValueError(f"Failed to convert image to {target_format} format: {e}") from e
123
204
 
124
205
 
125
206
  def pad_image(
@@ -128,6 +209,7 @@ def pad_image(
128
209
  target_height: int = DEFAULT_MAX_HEIGHT,
129
210
  background_color: int = 255,
130
211
  dtype=np.uint8,
212
+ how: str = "center",
131
213
  ) -> Tuple[np.ndarray, Tuple[int, int]]:
132
214
  """
133
215
  Pads a NumPy array representing an image to the specified target dimensions.
@@ -136,6 +218,8 @@ def pad_image(
136
218
  in that dimension. If the target dimensions are larger, the image will be centered within the
137
219
  canvas of the specified target size, with the remaining space filled with white padding.
138
220
 
221
+ The padding can be done around the center (how="center"), or to the bottom right (how="bottom_right").
222
+
139
223
  Parameters
140
224
  ----------
141
225
  array : np.ndarray
@@ -144,6 +228,8 @@ def pad_image(
144
228
  The desired target width of the padded image. Defaults to DEFAULT_MAX_WIDTH.
145
229
  target_height : int, optional
146
230
  The desired target height of the padded image. Defaults to DEFAULT_MAX_HEIGHT.
231
+ how : str, optional
232
+ The method to pad the image. Defaults to "center".
147
233
 
148
234
  Returns
149
235
  -------
@@ -168,17 +254,23 @@ def pad_image(
168
254
  """
169
255
  height, width = array.shape[:2]
170
256
 
171
- # Determine the padding needed, if any, while ensuring no padding is applied if the target is smaller
172
- pad_height = max((target_height - height) // 2, 0)
173
- pad_width = max((target_width - width) // 2, 0)
174
-
175
257
  # Determine final canvas size (may be equal to original if target is smaller)
176
258
  final_height = max(height, target_height)
177
259
  final_width = max(width, target_width)
178
260
 
179
261
  # Create the canvas and place the original image on it
180
262
  canvas = background_color * np.ones((final_height, final_width, array.shape[2]), dtype=dtype)
181
- canvas[pad_height : pad_height + height, pad_width : pad_width + width] = array # noqa: E203
263
+
264
+ # Determine the padding needed, if any, while ensuring no padding is applied if the target is smaller
265
+ if how == "center":
266
+ pad_height = max((target_height - height) // 2, 0)
267
+ pad_width = max((target_width - width) // 2, 0)
268
+
269
+ canvas[pad_height : pad_height + height, pad_width : pad_width + width] = array # noqa: E203
270
+ elif how == "bottom_right":
271
+ pad_height, pad_width = 0, 0
272
+
273
+ canvas[:height, :width] = array # noqa: E203
182
274
 
183
275
  return canvas, (pad_width, pad_height)
184
276
 
@@ -302,66 +394,193 @@ def normalize_image(
302
394
  return output_array
303
395
 
304
396
 
305
- def numpy_to_base64(array: np.ndarray) -> str:
397
+ def _preprocess_numpy_array(array: np.ndarray) -> np.ndarray:
398
+ """
399
+ Preprocesses a NumPy array for image encoding by ensuring proper format and data type.
400
+ Also handles color space conversion for OpenCV encoding.
401
+
402
+ Parameters
403
+ ----------
404
+ array : np.ndarray
405
+ The input image as a NumPy array.
406
+
407
+ Returns
408
+ -------
409
+ np.ndarray
410
+ The preprocessed array in uint8 format, ready for OpenCV encoding (BGR color order for color images).
411
+
412
+ Raises
413
+ ------
414
+ ValueError
415
+ If the input array cannot be converted into a valid image format.
416
+ """
417
+ # Check if the array is valid and can be converted to an image
418
+ try:
419
+ # If the array represents a grayscale image, drop the redundant axis in
420
+ # (h, w, 1). cv2 expects (h, w) for grayscale.
421
+ if array.ndim == 3 and array.shape[2] == 1:
422
+ array = np.squeeze(array, axis=2)
423
+
424
+ # Ensure uint8 data type
425
+ processed_array = array.astype(np.uint8)
426
+
427
+ # OpenCV uses BGR color order, so convert RGB to BGR if needed
428
+ if processed_array.ndim == 3 and processed_array.shape[2] == 3:
429
+ # Assume input is RGB and convert to BGR for OpenCV
430
+ processed_array = cv2.cvtColor(processed_array, cv2.COLOR_RGB2BGR)
431
+
432
+ return processed_array
433
+ except Exception as e:
434
+ raise ValueError(f"Failed to preprocess NumPy array for image encoding: {e}")
435
+
436
+
437
+ def _encode_opencv_jpeg(array: np.ndarray, *, quality: int = 100) -> bytes:
438
+ """NumPy array -> JPEG bytes using OpenCV."""
439
+ ok, buf = cv2.imencode(".jpg", array, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
440
+ if not ok:
441
+ raise RuntimeError("cv2.imencode failed")
442
+ return buf.tobytes()
443
+
444
+
445
+ def _encode_opencv_png(array: np.ndarray, *, compression: int = 6) -> bytes:
446
+ """NumPy array -> PNG bytes using OpenCV"""
447
+ encode_params = [
448
+ cv2.IMWRITE_PNG_COMPRESSION,
449
+ compression,
450
+ cv2.IMWRITE_PNG_STRATEGY,
451
+ cv2.IMWRITE_PNG_STRATEGY_DEFAULT,
452
+ ]
453
+ ok, buf = cv2.imencode(".png", array, encode_params)
454
+ if not ok:
455
+ raise RuntimeError("cv2.imencode(.png) failed")
456
+ return buf.tobytes()
457
+
458
+
459
+ def numpy_to_base64_png(array: np.ndarray) -> str:
460
+ """
461
+ Converts a preprocessed NumPy array representing an image to a base64-encoded PNG string using OpenCV.
462
+
463
+ Parameters
464
+ ----------
465
+ array : np.ndarray
466
+ The preprocessed input image as a NumPy array. Must have a shape compatible with image data.
467
+
468
+ Returns
469
+ -------
470
+ str
471
+ The base64-encoded PNG string representation of the input NumPy array.
472
+
473
+ Raises
474
+ ------
475
+ RuntimeError
476
+ If there is an issue during the image conversion or base64 encoding process.
477
+ """
478
+ try:
479
+ # Encode to PNG bytes using OpenCV
480
+ png_bytes = _encode_opencv_png(array)
481
+
482
+ # Convert to base64
483
+ base64_img = bytetools.base64frombytes(png_bytes)
484
+ except Exception as e:
485
+ raise RuntimeError(f"Failed to encode image to base64 PNG: {e}")
486
+
487
+ return base64_img
488
+
489
+
490
+ def numpy_to_base64_jpeg(array: np.ndarray, quality: int = 100) -> str:
491
+ """
492
+ Converts a preprocessed NumPy array representing an image to a base64-encoded JPEG string using OpenCV.
493
+
494
+ Parameters
495
+ ----------
496
+ array : np.ndarray
497
+ The preprocessed input image as a NumPy array. Must have a shape compatible with image data.
498
+ quality : int, optional
499
+ JPEG quality (1-100), by default 100. Higher values mean better quality but larger file size.
500
+
501
+ Returns
502
+ -------
503
+ str
504
+ The base64-encoded JPEG string representation of the input NumPy array.
505
+
506
+ Raises
507
+ ------
508
+ RuntimeError
509
+ If there is an issue during the image conversion or base64 encoding process.
510
+ """
511
+ try:
512
+ # Encode to JPEG bytes using OpenCV
513
+ jpeg_bytes = _encode_opencv_jpeg(array, quality=quality)
514
+
515
+ # Convert to base64
516
+ base64_img = bytetools.base64frombytes(jpeg_bytes)
517
+ except Exception as e:
518
+ raise RuntimeError(f"Failed to encode image to base64 JPEG: {e}")
519
+
520
+ return base64_img
521
+
522
+
523
+ def numpy_to_base64(array: np.ndarray, format: str = "PNG", **kwargs) -> str:
306
524
  """
307
525
  Converts a NumPy array representing an image to a base64-encoded string.
308
526
 
309
- The function takes a NumPy array, converts it to a PIL image, and then encodes
310
- the image as a PNG in a base64 string format. The input array is expected to be in
311
- a format that can be converted to a valid image, such as having a shape of (H, W, C)
312
- where C is the number of channels (e.g., 3 for RGB).
527
+ The function takes a NumPy array, preprocesses it, and then encodes
528
+ the image in the specified format as a base64 string. The input array is expected
529
+ to be in a format that can be converted to a valid image, such as having a shape
530
+ of (H, W, C) where C is the number of channels (e.g., 3 for RGB).
313
531
 
314
532
  Parameters
315
533
  ----------
316
534
  array : np.ndarray
317
535
  The input image as a NumPy array. Must have a shape compatible with image data.
536
+ format : str, optional
537
+ The image format to use for encoding. Supported formats are "PNG" and "JPEG".
538
+ Defaults to "PNG".
539
+ **kwargs
540
+ Additional keyword arguments passed to the format-specific encoding function.
541
+ For JPEG: quality (int, default=100) - JPEG quality (1-100).
318
542
 
319
543
  Returns
320
544
  -------
321
545
  str
322
- The base64-encoded string representation of the input NumPy array as a PNG image.
546
+ The base64-encoded string representation of the input NumPy array in the specified format.
323
547
 
324
548
  Raises
325
549
  ------
326
550
  ValueError
327
- If the input array cannot be converted into a valid image format.
551
+ If the input array cannot be converted into a valid image format, or if an
552
+ unsupported format is specified.
328
553
  RuntimeError
329
554
  If there is an issue during the image conversion or base64 encoding process.
330
555
 
331
556
  Examples
332
557
  --------
333
558
  >>> array = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
334
- >>> encoded_str = numpy_to_base64(array)
559
+ >>> encoded_str = numpy_to_base64(array, format="PNG")
335
560
  >>> isinstance(encoded_str, str)
336
561
  True
562
+ >>> encoded_str_jpeg = numpy_to_base64(array, format="JPEG", quality=90)
563
+ >>> isinstance(encoded_str_jpeg, str)
564
+ True
337
565
  """
338
- # If the array represents a grayscale image, drop the redundant axis in
339
- # (h, w, 1). PIL.Image.fromarray() expects an array of form (h, w) if it's
340
- # a grayscale image.
341
- if array.ndim == 3 and array.shape[2] == 1:
342
- array = np.squeeze(array, axis=2)
566
+ # Centralized preprocessing of the numpy array
567
+ processed_array = _preprocess_numpy_array(array)
343
568
 
344
- # Check if the array is valid and can be converted to an image
345
- try:
346
- # Convert the NumPy array to a PIL image
347
- pil_image = Image.fromarray(array.astype(np.uint8))
348
- except Exception as e:
349
- raise ValueError(f"Failed to convert NumPy array to image: {e}")
569
+ format = format.upper()
350
570
 
351
- try:
352
- # Convert the PIL image to a base64-encoded string
353
- with BytesIO() as buffer:
354
- pil_image.save(buffer, format="PNG")
355
- base64_img = bytetools.base64frombytes(buffer.getvalue())
356
- except Exception as e:
357
- raise RuntimeError(f"Failed to encode image to base64: {e}")
358
-
359
- return base64_img
571
+ if format == "PNG":
572
+ return numpy_to_base64_png(processed_array)
573
+ elif format == "JPEG" or format == "JPG":
574
+ quality = kwargs.get("quality", 100)
575
+ return numpy_to_base64_jpeg(processed_array, quality=quality)
576
+ else:
577
+ raise ValueError(f"Unsupported format: {format}. Supported formats are 'PNG' and 'JPEG'.")
360
578
 
361
579
 
362
580
  def base64_to_numpy(base64_string: str) -> np.ndarray:
363
581
  """
364
- Convert a base64-encoded image string to a NumPy array.
582
+ Convert a base64-encoded image string to a NumPy array using OpenCV.
583
+ Returns images in RGB format for consistency.
365
584
 
366
585
  Parameters
367
586
  ----------
@@ -371,37 +590,82 @@ def base64_to_numpy(base64_string: str) -> np.ndarray:
371
590
  Returns
372
591
  -------
373
592
  numpy.ndarray
374
- NumPy array representation of the decoded image.
593
+ NumPy array representation of the decoded image in RGB format (for color images).
594
+ Grayscale images are returned as-is.
375
595
 
376
596
  Raises
377
597
  ------
378
598
  ValueError
379
599
  If the base64 string is invalid or cannot be decoded into an image.
380
- ImportError
381
- If required libraries are not installed.
382
600
 
383
601
  Examples
384
602
  --------
385
603
  >>> base64_str = '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBD...'
386
604
  >>> img_array = base64_to_numpy(base64_str)
605
+ >>> # img_array is now in RGB format (for color images)
387
606
  """
388
607
  try:
389
- # Decode the base64 string
390
- image_data = base64.b64decode(base64_string)
391
- except (base64.binascii.Error, ValueError) as e:
608
+ # Decode the base64 string to bytes using bytetools
609
+ image_bytes = bytetools.bytesfrombase64(base64_string)
610
+ except Exception as e:
392
611
  raise ValueError("Invalid base64 string") from e
393
612
 
613
+ # Create numpy buffer from bytes and decode using OpenCV
614
+ buf = np.frombuffer(image_bytes, dtype=np.uint8)
394
615
  try:
395
- # Convert the bytes into a BytesIO object
396
- image_bytes = BytesIO(image_data)
397
-
398
- # Open the image using PIL
399
- image = Image.open(image_bytes)
400
- image.load()
401
- except UnidentifiedImageError as e:
616
+ img = cv2.imdecode(buf, cv2.IMREAD_UNCHANGED)
617
+ if img is None:
618
+ raise ValueError("OpenCV failed to decode image")
619
+
620
+ # Convert BGR to RGB for consistent processing (OpenCV loads as BGR)
621
+ # Only convert if it's a 3-channel color image
622
+ if img.ndim == 3 and img.shape[2] == 3:
623
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
624
+ except ImportError:
625
+ raise
626
+ except Exception as e:
402
627
  raise ValueError("Unable to decode image from base64 string") from e
403
628
 
404
- # Convert the image to a NumPy array
405
- image_array = np.array(image)
629
+ # Convert to numpy array
630
+ img = np.array(img)
631
+ # Assert that 3-channel images are in RGB format after conversion
632
+ assert img.ndim <= 3, f"Image has unexpected number of dimensions: {img.ndim}"
633
+ assert img.ndim != 3 or img.shape[2] == 3, f"3-channel image should have 3 channels, got: {img.shape[2]}"
634
+
635
+ return img
636
+
637
+
638
+ def scale_numpy_image(
639
+ img_arr: np.ndarray, scale_tuple: Optional[Tuple[int, int]] = None, interpolation=Image.LANCZOS
640
+ ) -> np.ndarray:
641
+ """
642
+ Scales a NumPy image array using OpenCV with aspect ratio preservation.
643
+
644
+ This function provides OpenCV-based image scaling that mimics PIL's thumbnail behavior
645
+ by maintaining aspect ratio and scaling to fit within the specified dimensions.
646
+
647
+ Parameters
648
+ ----------
649
+ img_arr : np.ndarray
650
+ The input image as a NumPy array.
651
+ scale_tuple : Optional[Tuple[int, int]], optional
652
+ A tuple (width, height) to resize the image to. If provided, the image
653
+ will be resized to fit within these dimensions while maintaining aspect ratio
654
+ (similar to PIL's thumbnail method). Defaults to None.
655
+ interpolation : int, optional
656
+ OpenCV interpolation method. Defaults to cv2.INTER_LANCZOS4.
406
657
 
407
- return image_array
658
+ Returns
659
+ -------
660
+ np.ndarray
661
+ A NumPy array representing the scaled image data.
662
+ """
663
+ # Apply scaling using OpenCV if specified
664
+ # Using PIL for scaling as CV2 seems to lead to different results
665
+ # TODO: Remove when we move to YOLOX Ensemble Models
666
+ if scale_tuple:
667
+ image = Image.fromarray(img_arr)
668
+ image.thumbnail(scale_tuple, interpolation)
669
+ img_arr = np.array(image)
670
+ # Ensure we return a copy
671
+ return img_arr.copy()
@@ -250,7 +250,7 @@ class SimpleMessageBrokerHandler(socketserver.BaseRequestHandler):
250
250
  with queue_lock:
251
251
  if queue.empty():
252
252
  # Return failure response immediately
253
- response = ResponseSchema(response_code=1, response_reason="Queue is empty")
253
+ response = ResponseSchema(response_code=2, response_reason="Job not ready")
254
254
  self._send_response(response)
255
255
  return
256
256
  # Pop the message from the queue