nv-ingest-api 25.7.7.dev20250707__py3-none-any.whl → 25.8.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/interface/extract.py +18 -18
- nv_ingest_api/internal/enums/common.py +6 -0
- nv_ingest_api/internal/extract/image/chart_extractor.py +80 -75
- nv_ingest_api/internal/extract/image/image_helpers/common.py +5 -6
- nv_ingest_api/internal/extract/image/infographic_extractor.py +59 -35
- nv_ingest_api/internal/extract/image/table_extractor.py +84 -64
- nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +9 -8
- nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +32 -20
- nv_ingest_api/internal/extract/pdf/engines/pdfium.py +40 -29
- nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +59 -0
- nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +1 -0
- nv_ingest_api/internal/primitives/nim/model_interface/{paddle.py → ocr.py} +132 -39
- nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +44 -236
- nv_ingest_api/internal/primitives/nim/nim_client.py +61 -18
- nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +6 -6
- nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +6 -6
- nv_ingest_api/internal/schemas/extract/extract_table_schema.py +5 -5
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +5 -0
- nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +1 -1
- nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +4 -0
- nv_ingest_api/internal/transform/embed_text.py +105 -12
- nv_ingest_api/internal/transform/split_text.py +13 -8
- nv_ingest_api/util/image_processing/table_and_chart.py +97 -42
- nv_ingest_api/util/image_processing/transforms.py +351 -87
- nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +1 -1
- nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +51 -48
- nv_ingest_api/util/metadata/aggregators.py +4 -1
- nv_ingest_api/util/pdf/pdfium.py +6 -14
- {nv_ingest_api-25.7.7.dev20250707.dist-info → nv_ingest_api-25.8.0rc2.dist-info}/METADATA +2 -1
- {nv_ingest_api-25.7.7.dev20250707.dist-info → nv_ingest_api-25.8.0rc2.dist-info}/RECORD +33 -33
- {nv_ingest_api-25.7.7.dev20250707.dist-info → nv_ingest_api-25.8.0rc2.dist-info}/WHEEL +0 -0
- {nv_ingest_api-25.7.7.dev20250707.dist-info → nv_ingest_api-25.8.0rc2.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-25.7.7.dev20250707.dist-info → nv_ingest_api-25.8.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -2,29 +2,55 @@
|
|
|
2
2
|
# All rights reserved.
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
import base64
|
|
6
|
-
import io
|
|
7
5
|
import logging
|
|
8
|
-
from io import BytesIO
|
|
9
6
|
from math import ceil
|
|
10
7
|
from math import floor
|
|
11
8
|
from typing import Optional
|
|
12
9
|
from typing import Tuple
|
|
13
10
|
|
|
11
|
+
import cv2
|
|
14
12
|
import numpy as np
|
|
13
|
+
from io import BytesIO
|
|
15
14
|
from PIL import Image
|
|
16
|
-
from PIL import UnidentifiedImageError
|
|
17
15
|
|
|
18
16
|
from nv_ingest_api.util.converters import bytetools
|
|
19
17
|
|
|
18
|
+
# Configure OpenCV to use a single thread for image processing
|
|
19
|
+
cv2.setNumThreads(1)
|
|
20
20
|
DEFAULT_MAX_WIDTH = 1024
|
|
21
21
|
DEFAULT_MAX_HEIGHT = 1280
|
|
22
22
|
|
|
23
|
+
# Workaround for PIL.Image.DecompressionBombError
|
|
24
|
+
Image.MAX_IMAGE_PIXELS = None
|
|
25
|
+
|
|
23
26
|
logger = logging.getLogger(__name__)
|
|
24
27
|
|
|
25
28
|
|
|
29
|
+
def _resize_image_opencv(
|
|
30
|
+
array: np.ndarray, target_size: Tuple[int, int], interpolation=cv2.INTER_LANCZOS4
|
|
31
|
+
) -> np.ndarray:
|
|
32
|
+
"""
|
|
33
|
+
Resizes a NumPy array representing an image using OpenCV.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
array : np.ndarray
|
|
38
|
+
The input image as a NumPy array.
|
|
39
|
+
target_size : Tuple[int, int]
|
|
40
|
+
The target size as (width, height).
|
|
41
|
+
interpolation : int, optional
|
|
42
|
+
OpenCV interpolation method. Defaults to cv2.INTER_LANCZOS4.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
np.ndarray
|
|
47
|
+
The resized image as a NumPy array.
|
|
48
|
+
"""
|
|
49
|
+
return cv2.resize(array, target_size, interpolation=interpolation)
|
|
50
|
+
|
|
51
|
+
|
|
26
52
|
def scale_image_to_encoding_size(
|
|
27
|
-
base64_image: str, max_base64_size: int = 180_000, initial_reduction: float = 0.9
|
|
53
|
+
base64_image: str, max_base64_size: int = 180_000, initial_reduction: float = 0.9, format: str = "PNG", **kwargs
|
|
28
54
|
) -> Tuple[str, Tuple[int, int]]:
|
|
29
55
|
"""
|
|
30
56
|
Decodes a base64-encoded image, resizes it if needed, and re-encodes it as base64.
|
|
@@ -38,12 +64,19 @@ def scale_image_to_encoding_size(
|
|
|
38
64
|
Maximum allowable size for the base64-encoded image, by default 180,000 characters.
|
|
39
65
|
initial_reduction : float, optional
|
|
40
66
|
Initial reduction step for resizing, by default 0.9.
|
|
67
|
+
format : str, optional
|
|
68
|
+
The image format to use for encoding. Supported formats are "PNG" and "JPEG".
|
|
69
|
+
Defaults to "PNG".
|
|
70
|
+
**kwargs
|
|
71
|
+
Additional keyword arguments passed to the format-specific encoding function.
|
|
72
|
+
For JPEG: quality (int, default=100) - JPEG quality (1-100).
|
|
73
|
+
For PNG: compression (int, default=3) - PNG compression level (0-9).
|
|
41
74
|
|
|
42
75
|
Returns
|
|
43
76
|
-------
|
|
44
77
|
Tuple[str, Tuple[int, int]]
|
|
45
78
|
A tuple containing:
|
|
46
|
-
- Base64-encoded
|
|
79
|
+
- Base64-encoded image string in the specified format, resized if necessary.
|
|
47
80
|
- The new size as a tuple (width, height).
|
|
48
81
|
|
|
49
82
|
Raises
|
|
@@ -52,12 +85,11 @@ def scale_image_to_encoding_size(
|
|
|
52
85
|
If the image cannot be resized below the specified max_base64_size.
|
|
53
86
|
"""
|
|
54
87
|
try:
|
|
55
|
-
# Decode the base64 image
|
|
56
|
-
|
|
57
|
-
img = Image.open(io.BytesIO(image_data)).convert("RGB")
|
|
88
|
+
# Decode the base64 image using OpenCV (returns RGB format)
|
|
89
|
+
img_array = base64_to_numpy(base64_image)
|
|
58
90
|
|
|
59
|
-
# Initial image size
|
|
60
|
-
original_size =
|
|
91
|
+
# Initial image size (height, width, channels) -> (width, height)
|
|
92
|
+
original_size = (img_array.shape[1], img_array.shape[0])
|
|
61
93
|
|
|
62
94
|
# Check initial size
|
|
63
95
|
if len(base64_image) <= max_base64_size:
|
|
@@ -66,23 +98,24 @@ def scale_image_to_encoding_size(
|
|
|
66
98
|
# Initial reduction step
|
|
67
99
|
reduction_step = initial_reduction
|
|
68
100
|
new_size = original_size
|
|
101
|
+
current_img = img_array.copy()
|
|
102
|
+
original_width, original_height = original_size
|
|
103
|
+
|
|
69
104
|
while len(base64_image) > max_base64_size:
|
|
70
|
-
|
|
71
|
-
new_size
|
|
105
|
+
new_size = (int(original_width * reduction_step), int(original_height * reduction_step))
|
|
106
|
+
if new_size[0] < 1 or new_size[1] < 1:
|
|
107
|
+
raise ValueError("Image cannot be resized further without becoming too small.")
|
|
108
|
+
|
|
109
|
+
# Resize the image using OpenCV
|
|
110
|
+
current_img = _resize_image_opencv(img_array, new_size)
|
|
72
111
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
img_resized.save(buffered, format="PNG")
|
|
76
|
-
base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
112
|
+
# Re-encode as base64 using the specified format
|
|
113
|
+
base64_image = numpy_to_base64(current_img, format=format, **kwargs)
|
|
77
114
|
|
|
78
115
|
# Adjust the reduction step if necessary
|
|
79
116
|
if len(base64_image) > max_base64_size:
|
|
80
117
|
reduction_step *= 0.95 # Reduce size further if needed
|
|
81
118
|
|
|
82
|
-
# Safety check
|
|
83
|
-
if new_size[0] < 1 or new_size[1] < 1:
|
|
84
|
-
raise Exception("Image cannot be resized further without becoming too small.")
|
|
85
|
-
|
|
86
119
|
return base64_image, new_size
|
|
87
120
|
|
|
88
121
|
except Exception as e:
|
|
@@ -90,36 +123,84 @@ def scale_image_to_encoding_size(
|
|
|
90
123
|
raise
|
|
91
124
|
|
|
92
125
|
|
|
93
|
-
def
|
|
126
|
+
def _detect_base64_image_format(base64_string: str) -> Optional[str]:
|
|
94
127
|
"""
|
|
95
|
-
|
|
128
|
+
Detects the format of a base64-encoded image using Pillow.
|
|
96
129
|
|
|
97
130
|
Parameters
|
|
98
131
|
----------
|
|
99
|
-
|
|
132
|
+
base64_string : str
|
|
100
133
|
Base64-encoded image string.
|
|
101
134
|
|
|
102
135
|
Returns
|
|
103
136
|
-------
|
|
104
|
-
|
|
105
|
-
Base64-encoded PNG image string.
|
|
137
|
+
The detected format ("PNG", "JPEG", "UNKNOWN")
|
|
106
138
|
"""
|
|
107
139
|
try:
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
140
|
+
image_bytes = bytetools.bytesfrombase64(base64_string)
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.error(f"Invalid base64 string: {e}")
|
|
143
|
+
raise ValueError(f"Invalid base64 string: {e}") from e
|
|
111
144
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
145
|
+
try:
|
|
146
|
+
with Image.open(BytesIO(image_bytes)) as img:
|
|
147
|
+
return img.format.upper()
|
|
148
|
+
except ImportError:
|
|
149
|
+
raise ImportError("Pillow library not available")
|
|
150
|
+
except Exception as e:
|
|
151
|
+
logger.error(f"Error detecting image format: {e}")
|
|
152
|
+
return "UNKNOWN"
|
|
118
153
|
|
|
154
|
+
|
|
155
|
+
def ensure_base64_format(base64_image: str, target_format: str = "PNG", **kwargs) -> str:
|
|
156
|
+
"""
|
|
157
|
+
Ensures the given base64-encoded image is in the specified format. Converts if necessary.
|
|
158
|
+
Skips conversion if the image is already in the target format.
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
base64_image : str
|
|
163
|
+
Base64-encoded image string.
|
|
164
|
+
target_format : str, optional
|
|
165
|
+
The target image format. Supported formats are "PNG" and "JPEG". Defaults to "PNG".
|
|
166
|
+
**kwargs
|
|
167
|
+
Additional keyword arguments passed to the format-specific encoding function.
|
|
168
|
+
For JPEG: quality (int, default=100) - JPEG quality (1-100).
|
|
169
|
+
For PNG: compression (int, default=3) - PNG compression level (0-9).
|
|
170
|
+
|
|
171
|
+
Returns
|
|
172
|
+
-------
|
|
173
|
+
str
|
|
174
|
+
Base64-encoded image string in the specified format.
|
|
175
|
+
|
|
176
|
+
Raises
|
|
177
|
+
------
|
|
178
|
+
ValueError
|
|
179
|
+
If there is an error during format conversion.
|
|
180
|
+
"""
|
|
181
|
+
target_format = target_format.upper()
|
|
182
|
+
if target_format == "JPG":
|
|
183
|
+
target_format = "JPEG"
|
|
184
|
+
|
|
185
|
+
current_format = _detect_base64_image_format(base64_image)
|
|
186
|
+
if current_format == "UNKNOWN":
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"Unable to decode image from base64 string: {base64_image}, because current format could not be detected."
|
|
189
|
+
)
|
|
190
|
+
if current_format == target_format:
|
|
191
|
+
logger.debug(f"Image already in {target_format} format, skipping conversion")
|
|
119
192
|
return base64_image
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
# Decode the base64 image using OpenCV (returns RGB format)
|
|
196
|
+
img_array = base64_to_numpy(base64_image)
|
|
197
|
+
# Re-encode in the target format
|
|
198
|
+
return numpy_to_base64(img_array, format=target_format, **kwargs)
|
|
199
|
+
except ImportError as e:
|
|
200
|
+
raise e
|
|
120
201
|
except Exception as e:
|
|
121
|
-
logger.error(f"Error
|
|
122
|
-
|
|
202
|
+
logger.error(f"Error converting image to {target_format} format: {e}")
|
|
203
|
+
raise ValueError(f"Failed to convert image to {target_format} format: {e}") from e
|
|
123
204
|
|
|
124
205
|
|
|
125
206
|
def pad_image(
|
|
@@ -128,6 +209,7 @@ def pad_image(
|
|
|
128
209
|
target_height: int = DEFAULT_MAX_HEIGHT,
|
|
129
210
|
background_color: int = 255,
|
|
130
211
|
dtype=np.uint8,
|
|
212
|
+
how: str = "center",
|
|
131
213
|
) -> Tuple[np.ndarray, Tuple[int, int]]:
|
|
132
214
|
"""
|
|
133
215
|
Pads a NumPy array representing an image to the specified target dimensions.
|
|
@@ -136,6 +218,8 @@ def pad_image(
|
|
|
136
218
|
in that dimension. If the target dimensions are larger, the image will be centered within the
|
|
137
219
|
canvas of the specified target size, with the remaining space filled with white padding.
|
|
138
220
|
|
|
221
|
+
The padding can be done around the center (how="center"), or to the bottom right (how="bottom_right").
|
|
222
|
+
|
|
139
223
|
Parameters
|
|
140
224
|
----------
|
|
141
225
|
array : np.ndarray
|
|
@@ -144,6 +228,8 @@ def pad_image(
|
|
|
144
228
|
The desired target width of the padded image. Defaults to DEFAULT_MAX_WIDTH.
|
|
145
229
|
target_height : int, optional
|
|
146
230
|
The desired target height of the padded image. Defaults to DEFAULT_MAX_HEIGHT.
|
|
231
|
+
how : str, optional
|
|
232
|
+
The method to pad the image. Defaults to "center".
|
|
147
233
|
|
|
148
234
|
Returns
|
|
149
235
|
-------
|
|
@@ -168,17 +254,23 @@ def pad_image(
|
|
|
168
254
|
"""
|
|
169
255
|
height, width = array.shape[:2]
|
|
170
256
|
|
|
171
|
-
# Determine the padding needed, if any, while ensuring no padding is applied if the target is smaller
|
|
172
|
-
pad_height = max((target_height - height) // 2, 0)
|
|
173
|
-
pad_width = max((target_width - width) // 2, 0)
|
|
174
|
-
|
|
175
257
|
# Determine final canvas size (may be equal to original if target is smaller)
|
|
176
258
|
final_height = max(height, target_height)
|
|
177
259
|
final_width = max(width, target_width)
|
|
178
260
|
|
|
179
261
|
# Create the canvas and place the original image on it
|
|
180
262
|
canvas = background_color * np.ones((final_height, final_width, array.shape[2]), dtype=dtype)
|
|
181
|
-
|
|
263
|
+
|
|
264
|
+
# Determine the padding needed, if any, while ensuring no padding is applied if the target is smaller
|
|
265
|
+
if how == "center":
|
|
266
|
+
pad_height = max((target_height - height) // 2, 0)
|
|
267
|
+
pad_width = max((target_width - width) // 2, 0)
|
|
268
|
+
|
|
269
|
+
canvas[pad_height : pad_height + height, pad_width : pad_width + width] = array # noqa: E203
|
|
270
|
+
elif how == "bottom_right":
|
|
271
|
+
pad_height, pad_width = 0, 0
|
|
272
|
+
|
|
273
|
+
canvas[:height, :width] = array # noqa: E203
|
|
182
274
|
|
|
183
275
|
return canvas, (pad_width, pad_height)
|
|
184
276
|
|
|
@@ -302,66 +394,193 @@ def normalize_image(
|
|
|
302
394
|
return output_array
|
|
303
395
|
|
|
304
396
|
|
|
305
|
-
def
|
|
397
|
+
def _preprocess_numpy_array(array: np.ndarray) -> np.ndarray:
|
|
398
|
+
"""
|
|
399
|
+
Preprocesses a NumPy array for image encoding by ensuring proper format and data type.
|
|
400
|
+
Also handles color space conversion for OpenCV encoding.
|
|
401
|
+
|
|
402
|
+
Parameters
|
|
403
|
+
----------
|
|
404
|
+
array : np.ndarray
|
|
405
|
+
The input image as a NumPy array.
|
|
406
|
+
|
|
407
|
+
Returns
|
|
408
|
+
-------
|
|
409
|
+
np.ndarray
|
|
410
|
+
The preprocessed array in uint8 format, ready for OpenCV encoding (BGR color order for color images).
|
|
411
|
+
|
|
412
|
+
Raises
|
|
413
|
+
------
|
|
414
|
+
ValueError
|
|
415
|
+
If the input array cannot be converted into a valid image format.
|
|
416
|
+
"""
|
|
417
|
+
# Check if the array is valid and can be converted to an image
|
|
418
|
+
try:
|
|
419
|
+
# If the array represents a grayscale image, drop the redundant axis in
|
|
420
|
+
# (h, w, 1). cv2 expects (h, w) for grayscale.
|
|
421
|
+
if array.ndim == 3 and array.shape[2] == 1:
|
|
422
|
+
array = np.squeeze(array, axis=2)
|
|
423
|
+
|
|
424
|
+
# Ensure uint8 data type
|
|
425
|
+
processed_array = array.astype(np.uint8)
|
|
426
|
+
|
|
427
|
+
# OpenCV uses BGR color order, so convert RGB to BGR if needed
|
|
428
|
+
if processed_array.ndim == 3 and processed_array.shape[2] == 3:
|
|
429
|
+
# Assume input is RGB and convert to BGR for OpenCV
|
|
430
|
+
processed_array = cv2.cvtColor(processed_array, cv2.COLOR_RGB2BGR)
|
|
431
|
+
|
|
432
|
+
return processed_array
|
|
433
|
+
except Exception as e:
|
|
434
|
+
raise ValueError(f"Failed to preprocess NumPy array for image encoding: {e}")
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def _encode_opencv_jpeg(array: np.ndarray, *, quality: int = 100) -> bytes:
|
|
438
|
+
"""NumPy array -> JPEG bytes using OpenCV."""
|
|
439
|
+
ok, buf = cv2.imencode(".jpg", array, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
|
|
440
|
+
if not ok:
|
|
441
|
+
raise RuntimeError("cv2.imencode failed")
|
|
442
|
+
return buf.tobytes()
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def _encode_opencv_png(array: np.ndarray, *, compression: int = 6) -> bytes:
|
|
446
|
+
"""NumPy array -> PNG bytes using OpenCV"""
|
|
447
|
+
encode_params = [
|
|
448
|
+
cv2.IMWRITE_PNG_COMPRESSION,
|
|
449
|
+
compression,
|
|
450
|
+
cv2.IMWRITE_PNG_STRATEGY,
|
|
451
|
+
cv2.IMWRITE_PNG_STRATEGY_DEFAULT,
|
|
452
|
+
]
|
|
453
|
+
ok, buf = cv2.imencode(".png", array, encode_params)
|
|
454
|
+
if not ok:
|
|
455
|
+
raise RuntimeError("cv2.imencode(.png) failed")
|
|
456
|
+
return buf.tobytes()
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def numpy_to_base64_png(array: np.ndarray) -> str:
|
|
460
|
+
"""
|
|
461
|
+
Converts a preprocessed NumPy array representing an image to a base64-encoded PNG string using OpenCV.
|
|
462
|
+
|
|
463
|
+
Parameters
|
|
464
|
+
----------
|
|
465
|
+
array : np.ndarray
|
|
466
|
+
The preprocessed input image as a NumPy array. Must have a shape compatible with image data.
|
|
467
|
+
|
|
468
|
+
Returns
|
|
469
|
+
-------
|
|
470
|
+
str
|
|
471
|
+
The base64-encoded PNG string representation of the input NumPy array.
|
|
472
|
+
|
|
473
|
+
Raises
|
|
474
|
+
------
|
|
475
|
+
RuntimeError
|
|
476
|
+
If there is an issue during the image conversion or base64 encoding process.
|
|
477
|
+
"""
|
|
478
|
+
try:
|
|
479
|
+
# Encode to PNG bytes using OpenCV
|
|
480
|
+
png_bytes = _encode_opencv_png(array)
|
|
481
|
+
|
|
482
|
+
# Convert to base64
|
|
483
|
+
base64_img = bytetools.base64frombytes(png_bytes)
|
|
484
|
+
except Exception as e:
|
|
485
|
+
raise RuntimeError(f"Failed to encode image to base64 PNG: {e}")
|
|
486
|
+
|
|
487
|
+
return base64_img
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def numpy_to_base64_jpeg(array: np.ndarray, quality: int = 100) -> str:
|
|
491
|
+
"""
|
|
492
|
+
Converts a preprocessed NumPy array representing an image to a base64-encoded JPEG string using OpenCV.
|
|
493
|
+
|
|
494
|
+
Parameters
|
|
495
|
+
----------
|
|
496
|
+
array : np.ndarray
|
|
497
|
+
The preprocessed input image as a NumPy array. Must have a shape compatible with image data.
|
|
498
|
+
quality : int, optional
|
|
499
|
+
JPEG quality (1-100), by default 100. Higher values mean better quality but larger file size.
|
|
500
|
+
|
|
501
|
+
Returns
|
|
502
|
+
-------
|
|
503
|
+
str
|
|
504
|
+
The base64-encoded JPEG string representation of the input NumPy array.
|
|
505
|
+
|
|
506
|
+
Raises
|
|
507
|
+
------
|
|
508
|
+
RuntimeError
|
|
509
|
+
If there is an issue during the image conversion or base64 encoding process.
|
|
510
|
+
"""
|
|
511
|
+
try:
|
|
512
|
+
# Encode to JPEG bytes using OpenCV
|
|
513
|
+
jpeg_bytes = _encode_opencv_jpeg(array, quality=quality)
|
|
514
|
+
|
|
515
|
+
# Convert to base64
|
|
516
|
+
base64_img = bytetools.base64frombytes(jpeg_bytes)
|
|
517
|
+
except Exception as e:
|
|
518
|
+
raise RuntimeError(f"Failed to encode image to base64 JPEG: {e}")
|
|
519
|
+
|
|
520
|
+
return base64_img
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def numpy_to_base64(array: np.ndarray, format: str = "PNG", **kwargs) -> str:
|
|
306
524
|
"""
|
|
307
525
|
Converts a NumPy array representing an image to a base64-encoded string.
|
|
308
526
|
|
|
309
|
-
The function takes a NumPy array,
|
|
310
|
-
the image
|
|
311
|
-
a format that can be converted to a valid image, such as having a shape
|
|
312
|
-
where C is the number of channels (e.g., 3 for RGB).
|
|
527
|
+
The function takes a NumPy array, preprocesses it, and then encodes
|
|
528
|
+
the image in the specified format as a base64 string. The input array is expected
|
|
529
|
+
to be in a format that can be converted to a valid image, such as having a shape
|
|
530
|
+
of (H, W, C) where C is the number of channels (e.g., 3 for RGB).
|
|
313
531
|
|
|
314
532
|
Parameters
|
|
315
533
|
----------
|
|
316
534
|
array : np.ndarray
|
|
317
535
|
The input image as a NumPy array. Must have a shape compatible with image data.
|
|
536
|
+
format : str, optional
|
|
537
|
+
The image format to use for encoding. Supported formats are "PNG" and "JPEG".
|
|
538
|
+
Defaults to "PNG".
|
|
539
|
+
**kwargs
|
|
540
|
+
Additional keyword arguments passed to the format-specific encoding function.
|
|
541
|
+
For JPEG: quality (int, default=100) - JPEG quality (1-100).
|
|
318
542
|
|
|
319
543
|
Returns
|
|
320
544
|
-------
|
|
321
545
|
str
|
|
322
|
-
The base64-encoded string representation of the input NumPy array
|
|
546
|
+
The base64-encoded string representation of the input NumPy array in the specified format.
|
|
323
547
|
|
|
324
548
|
Raises
|
|
325
549
|
------
|
|
326
550
|
ValueError
|
|
327
|
-
If the input array cannot be converted into a valid image format
|
|
551
|
+
If the input array cannot be converted into a valid image format, or if an
|
|
552
|
+
unsupported format is specified.
|
|
328
553
|
RuntimeError
|
|
329
554
|
If there is an issue during the image conversion or base64 encoding process.
|
|
330
555
|
|
|
331
556
|
Examples
|
|
332
557
|
--------
|
|
333
558
|
>>> array = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
|
|
334
|
-
>>> encoded_str = numpy_to_base64(array)
|
|
559
|
+
>>> encoded_str = numpy_to_base64(array, format="PNG")
|
|
335
560
|
>>> isinstance(encoded_str, str)
|
|
336
561
|
True
|
|
562
|
+
>>> encoded_str_jpeg = numpy_to_base64(array, format="JPEG", quality=90)
|
|
563
|
+
>>> isinstance(encoded_str_jpeg, str)
|
|
564
|
+
True
|
|
337
565
|
"""
|
|
338
|
-
#
|
|
339
|
-
|
|
340
|
-
# a grayscale image.
|
|
341
|
-
if array.ndim == 3 and array.shape[2] == 1:
|
|
342
|
-
array = np.squeeze(array, axis=2)
|
|
566
|
+
# Centralized preprocessing of the numpy array
|
|
567
|
+
processed_array = _preprocess_numpy_array(array)
|
|
343
568
|
|
|
344
|
-
|
|
345
|
-
try:
|
|
346
|
-
# Convert the NumPy array to a PIL image
|
|
347
|
-
pil_image = Image.fromarray(array.astype(np.uint8))
|
|
348
|
-
except Exception as e:
|
|
349
|
-
raise ValueError(f"Failed to convert NumPy array to image: {e}")
|
|
569
|
+
format = format.upper()
|
|
350
570
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
raise
|
|
358
|
-
|
|
359
|
-
return base64_img
|
|
571
|
+
if format == "PNG":
|
|
572
|
+
return numpy_to_base64_png(processed_array)
|
|
573
|
+
elif format == "JPEG" or format == "JPG":
|
|
574
|
+
quality = kwargs.get("quality", 100)
|
|
575
|
+
return numpy_to_base64_jpeg(processed_array, quality=quality)
|
|
576
|
+
else:
|
|
577
|
+
raise ValueError(f"Unsupported format: {format}. Supported formats are 'PNG' and 'JPEG'.")
|
|
360
578
|
|
|
361
579
|
|
|
362
580
|
def base64_to_numpy(base64_string: str) -> np.ndarray:
|
|
363
581
|
"""
|
|
364
|
-
Convert a base64-encoded image string to a NumPy array.
|
|
582
|
+
Convert a base64-encoded image string to a NumPy array using OpenCV.
|
|
583
|
+
Returns images in RGB format for consistency.
|
|
365
584
|
|
|
366
585
|
Parameters
|
|
367
586
|
----------
|
|
@@ -371,37 +590,82 @@ def base64_to_numpy(base64_string: str) -> np.ndarray:
|
|
|
371
590
|
Returns
|
|
372
591
|
-------
|
|
373
592
|
numpy.ndarray
|
|
374
|
-
NumPy array representation of the decoded image.
|
|
593
|
+
NumPy array representation of the decoded image in RGB format (for color images).
|
|
594
|
+
Grayscale images are returned as-is.
|
|
375
595
|
|
|
376
596
|
Raises
|
|
377
597
|
------
|
|
378
598
|
ValueError
|
|
379
599
|
If the base64 string is invalid or cannot be decoded into an image.
|
|
380
|
-
ImportError
|
|
381
|
-
If required libraries are not installed.
|
|
382
600
|
|
|
383
601
|
Examples
|
|
384
602
|
--------
|
|
385
603
|
>>> base64_str = '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBD...'
|
|
386
604
|
>>> img_array = base64_to_numpy(base64_str)
|
|
605
|
+
>>> # img_array is now in RGB format (for color images)
|
|
387
606
|
"""
|
|
388
607
|
try:
|
|
389
|
-
# Decode the base64 string
|
|
390
|
-
|
|
391
|
-
except
|
|
608
|
+
# Decode the base64 string to bytes using bytetools
|
|
609
|
+
image_bytes = bytetools.bytesfrombase64(base64_string)
|
|
610
|
+
except Exception as e:
|
|
392
611
|
raise ValueError("Invalid base64 string") from e
|
|
393
612
|
|
|
613
|
+
# Create numpy buffer from bytes and decode using OpenCV
|
|
614
|
+
buf = np.frombuffer(image_bytes, dtype=np.uint8)
|
|
394
615
|
try:
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
image
|
|
401
|
-
|
|
616
|
+
img = cv2.imdecode(buf, cv2.IMREAD_UNCHANGED)
|
|
617
|
+
if img is None:
|
|
618
|
+
raise ValueError("OpenCV failed to decode image")
|
|
619
|
+
|
|
620
|
+
# Convert BGR to RGB for consistent processing (OpenCV loads as BGR)
|
|
621
|
+
# Only convert if it's a 3-channel color image
|
|
622
|
+
if img.ndim == 3 and img.shape[2] == 3:
|
|
623
|
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
|
624
|
+
except ImportError:
|
|
625
|
+
raise
|
|
626
|
+
except Exception as e:
|
|
402
627
|
raise ValueError("Unable to decode image from base64 string") from e
|
|
403
628
|
|
|
404
|
-
# Convert
|
|
405
|
-
|
|
629
|
+
# Convert to numpy array
|
|
630
|
+
img = np.array(img)
|
|
631
|
+
# Assert that 3-channel images are in RGB format after conversion
|
|
632
|
+
assert img.ndim <= 3, f"Image has unexpected number of dimensions: {img.ndim}"
|
|
633
|
+
assert img.ndim != 3 or img.shape[2] == 3, f"3-channel image should have 3 channels, got: {img.shape[2]}"
|
|
634
|
+
|
|
635
|
+
return img
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def scale_numpy_image(
|
|
639
|
+
img_arr: np.ndarray, scale_tuple: Optional[Tuple[int, int]] = None, interpolation=Image.LANCZOS
|
|
640
|
+
) -> np.ndarray:
|
|
641
|
+
"""
|
|
642
|
+
Scales a NumPy image array using OpenCV with aspect ratio preservation.
|
|
643
|
+
|
|
644
|
+
This function provides OpenCV-based image scaling that mimics PIL's thumbnail behavior
|
|
645
|
+
by maintaining aspect ratio and scaling to fit within the specified dimensions.
|
|
646
|
+
|
|
647
|
+
Parameters
|
|
648
|
+
----------
|
|
649
|
+
img_arr : np.ndarray
|
|
650
|
+
The input image as a NumPy array.
|
|
651
|
+
scale_tuple : Optional[Tuple[int, int]], optional
|
|
652
|
+
A tuple (width, height) to resize the image to. If provided, the image
|
|
653
|
+
will be resized to fit within these dimensions while maintaining aspect ratio
|
|
654
|
+
(similar to PIL's thumbnail method). Defaults to None.
|
|
655
|
+
interpolation : int, optional
|
|
656
|
+
OpenCV interpolation method. Defaults to cv2.INTER_LANCZOS4.
|
|
406
657
|
|
|
407
|
-
|
|
658
|
+
Returns
|
|
659
|
+
-------
|
|
660
|
+
np.ndarray
|
|
661
|
+
A NumPy array representing the scaled image data.
|
|
662
|
+
"""
|
|
663
|
+
# Apply scaling using OpenCV if specified
|
|
664
|
+
# Using PIL for scaling as CV2 seems to lead to different results
|
|
665
|
+
# TODO: Remove when we move to YOLOX Ensemble Models
|
|
666
|
+
if scale_tuple:
|
|
667
|
+
image = Image.fromarray(img_arr)
|
|
668
|
+
image.thumbnail(scale_tuple, interpolation)
|
|
669
|
+
img_arr = np.array(image)
|
|
670
|
+
# Ensure we return a copy
|
|
671
|
+
return img_arr.copy()
|
|
@@ -250,7 +250,7 @@ class SimpleMessageBrokerHandler(socketserver.BaseRequestHandler):
|
|
|
250
250
|
with queue_lock:
|
|
251
251
|
if queue.empty():
|
|
252
252
|
# Return failure response immediately
|
|
253
|
-
response = ResponseSchema(response_code=
|
|
253
|
+
response = ResponseSchema(response_code=2, response_reason="Job not ready")
|
|
254
254
|
self._send_response(response)
|
|
255
255
|
return
|
|
256
256
|
# Pop the message from the queue
|