nv-ingest-api 2025.4.15.dev20250415__py3-none-any.whl → 2025.4.17.dev20250417__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/__init__.py +3 -0
- nv_ingest_api/interface/__init__.py +215 -0
- nv_ingest_api/interface/extract.py +972 -0
- nv_ingest_api/interface/mutate.py +154 -0
- nv_ingest_api/interface/store.py +218 -0
- nv_ingest_api/interface/transform.py +382 -0
- nv_ingest_api/interface/utility.py +200 -0
- nv_ingest_api/internal/enums/__init__.py +3 -0
- nv_ingest_api/internal/enums/common.py +494 -0
- nv_ingest_api/internal/extract/__init__.py +3 -0
- nv_ingest_api/internal/extract/audio/__init__.py +3 -0
- nv_ingest_api/internal/extract/audio/audio_extraction.py +149 -0
- nv_ingest_api/internal/extract/docx/__init__.py +5 -0
- nv_ingest_api/internal/extract/docx/docx_extractor.py +205 -0
- nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
- nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +3 -0
- nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +122 -0
- nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +895 -0
- nv_ingest_api/internal/extract/image/__init__.py +3 -0
- nv_ingest_api/internal/extract/image/chart_extractor.py +353 -0
- nv_ingest_api/internal/extract/image/image_extractor.py +204 -0
- nv_ingest_api/internal/extract/image/image_helpers/__init__.py +3 -0
- nv_ingest_api/internal/extract/image/image_helpers/common.py +403 -0
- nv_ingest_api/internal/extract/image/infographic_extractor.py +253 -0
- nv_ingest_api/internal/extract/image/table_extractor.py +344 -0
- nv_ingest_api/internal/extract/pdf/__init__.py +3 -0
- nv_ingest_api/internal/extract/pdf/engines/__init__.py +19 -0
- nv_ingest_api/internal/extract/pdf/engines/adobe.py +484 -0
- nv_ingest_api/internal/extract/pdf/engines/llama.py +243 -0
- nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +597 -0
- nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +146 -0
- nv_ingest_api/internal/extract/pdf/engines/pdfium.py +603 -0
- nv_ingest_api/internal/extract/pdf/engines/tika.py +96 -0
- nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +426 -0
- nv_ingest_api/internal/extract/pdf/pdf_extractor.py +74 -0
- nv_ingest_api/internal/extract/pptx/__init__.py +5 -0
- nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
- nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +799 -0
- nv_ingest_api/internal/extract/pptx/pptx_extractor.py +187 -0
- nv_ingest_api/internal/mutate/__init__.py +3 -0
- nv_ingest_api/internal/mutate/deduplicate.py +110 -0
- nv_ingest_api/internal/mutate/filter.py +133 -0
- nv_ingest_api/internal/primitives/__init__.py +0 -0
- nv_ingest_api/{primitives → internal/primitives}/control_message_task.py +4 -0
- nv_ingest_api/{primitives → internal/primitives}/ingest_control_message.py +5 -2
- nv_ingest_api/internal/primitives/nim/__init__.py +8 -0
- nv_ingest_api/internal/primitives/nim/default_values.py +15 -0
- nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +3 -0
- nv_ingest_api/internal/primitives/nim/model_interface/cached.py +274 -0
- nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +56 -0
- nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +270 -0
- nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +275 -0
- nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +238 -0
- nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +462 -0
- nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +367 -0
- nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +132 -0
- nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +152 -0
- nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +1400 -0
- nv_ingest_api/internal/primitives/nim/nim_client.py +344 -0
- nv_ingest_api/internal/primitives/nim/nim_model_interface.py +81 -0
- nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
- nv_ingest_api/internal/primitives/tracing/latency.py +69 -0
- nv_ingest_api/internal/primitives/tracing/logging.py +96 -0
- nv_ingest_api/internal/primitives/tracing/tagging.py +197 -0
- nv_ingest_api/internal/schemas/__init__.py +3 -0
- nv_ingest_api/internal/schemas/extract/__init__.py +3 -0
- nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +130 -0
- nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +135 -0
- nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +124 -0
- nv_ingest_api/internal/schemas/extract/extract_image_schema.py +124 -0
- nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +128 -0
- nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +218 -0
- nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +124 -0
- nv_ingest_api/internal/schemas/extract/extract_table_schema.py +129 -0
- nv_ingest_api/internal/schemas/message_brokers/__init__.py +3 -0
- nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +23 -0
- nv_ingest_api/internal/schemas/message_brokers/request_schema.py +34 -0
- nv_ingest_api/internal/schemas/message_brokers/response_schema.py +19 -0
- nv_ingest_api/internal/schemas/meta/__init__.py +3 -0
- nv_ingest_api/internal/schemas/meta/base_model_noext.py +11 -0
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +237 -0
- nv_ingest_api/internal/schemas/meta/metadata_schema.py +221 -0
- nv_ingest_api/internal/schemas/mutate/__init__.py +3 -0
- nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +16 -0
- nv_ingest_api/internal/schemas/store/__init__.py +3 -0
- nv_ingest_api/internal/schemas/store/store_embedding_schema.py +28 -0
- nv_ingest_api/internal/schemas/store/store_image_schema.py +30 -0
- nv_ingest_api/internal/schemas/transform/__init__.py +3 -0
- nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +15 -0
- nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +17 -0
- nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +25 -0
- nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +22 -0
- nv_ingest_api/internal/store/__init__.py +3 -0
- nv_ingest_api/internal/store/embed_text_upload.py +236 -0
- nv_ingest_api/internal/store/image_upload.py +232 -0
- nv_ingest_api/internal/transform/__init__.py +3 -0
- nv_ingest_api/internal/transform/caption_image.py +205 -0
- nv_ingest_api/internal/transform/embed_text.py +496 -0
- nv_ingest_api/internal/transform/split_text.py +157 -0
- nv_ingest_api/util/__init__.py +0 -0
- nv_ingest_api/util/control_message/__init__.py +0 -0
- nv_ingest_api/util/control_message/validators.py +47 -0
- nv_ingest_api/util/converters/__init__.py +0 -0
- nv_ingest_api/util/converters/bytetools.py +78 -0
- nv_ingest_api/util/converters/containers.py +65 -0
- nv_ingest_api/util/converters/datetools.py +90 -0
- nv_ingest_api/util/converters/dftools.py +127 -0
- nv_ingest_api/util/converters/formats.py +64 -0
- nv_ingest_api/util/converters/type_mappings.py +27 -0
- nv_ingest_api/util/detectors/__init__.py +5 -0
- nv_ingest_api/util/detectors/language.py +38 -0
- nv_ingest_api/util/exception_handlers/__init__.py +0 -0
- nv_ingest_api/util/exception_handlers/converters.py +72 -0
- nv_ingest_api/util/exception_handlers/decorators.py +223 -0
- nv_ingest_api/util/exception_handlers/detectors.py +74 -0
- nv_ingest_api/util/exception_handlers/pdf.py +116 -0
- nv_ingest_api/util/exception_handlers/schemas.py +68 -0
- nv_ingest_api/util/image_processing/__init__.py +5 -0
- nv_ingest_api/util/image_processing/clustering.py +260 -0
- nv_ingest_api/util/image_processing/processing.py +179 -0
- nv_ingest_api/util/image_processing/table_and_chart.py +449 -0
- nv_ingest_api/util/image_processing/transforms.py +407 -0
- nv_ingest_api/util/logging/__init__.py +0 -0
- nv_ingest_api/util/logging/configuration.py +31 -0
- nv_ingest_api/util/message_brokers/__init__.py +3 -0
- nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +9 -0
- nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +465 -0
- nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +71 -0
- nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +435 -0
- nv_ingest_api/util/metadata/__init__.py +5 -0
- nv_ingest_api/util/metadata/aggregators.py +469 -0
- nv_ingest_api/util/multi_processing/__init__.py +8 -0
- nv_ingest_api/util/multi_processing/mp_pool_singleton.py +194 -0
- nv_ingest_api/util/nim/__init__.py +56 -0
- nv_ingest_api/util/pdf/__init__.py +3 -0
- nv_ingest_api/util/pdf/pdfium.py +427 -0
- nv_ingest_api/util/schema/__init__.py +0 -0
- nv_ingest_api/util/schema/schema_validator.py +10 -0
- nv_ingest_api/util/service_clients/__init__.py +3 -0
- nv_ingest_api/util/service_clients/client_base.py +72 -0
- nv_ingest_api/util/service_clients/kafka/__init__.py +3 -0
- nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
- nv_ingest_api/util/service_clients/redis/redis_client.py +334 -0
- nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
- nv_ingest_api/util/service_clients/rest/rest_client.py +398 -0
- nv_ingest_api/util/string_processing/__init__.py +51 -0
- {nv_ingest_api-2025.4.15.dev20250415.dist-info → nv_ingest_api-2025.4.17.dev20250417.dist-info}/METADATA +1 -1
- nv_ingest_api-2025.4.17.dev20250417.dist-info/RECORD +152 -0
- nv_ingest_api-2025.4.15.dev20250415.dist-info/RECORD +0 -9
- /nv_ingest_api/{primitives → internal}/__init__.py +0 -0
- {nv_ingest_api-2025.4.15.dev20250415.dist-info → nv_ingest_api-2025.4.17.dev20250417.dist-info}/WHEEL +0 -0
- {nv_ingest_api-2025.4.15.dev20250415.dist-info → nv_ingest_api-2025.4.17.dev20250417.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.4.15.dev20250415.dist-info → nv_ingest_api-2025.4.17.dev20250417.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import io
|
|
7
|
+
import logging
|
|
8
|
+
from io import BytesIO
|
|
9
|
+
from math import ceil
|
|
10
|
+
from math import floor
|
|
11
|
+
from typing import Optional
|
|
12
|
+
from typing import Tuple
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
from PIL import Image
|
|
16
|
+
from PIL import UnidentifiedImageError
|
|
17
|
+
|
|
18
|
+
from nv_ingest_api.util.converters import bytetools
|
|
19
|
+
|
|
20
|
+
DEFAULT_MAX_WIDTH = 1024
|
|
21
|
+
DEFAULT_MAX_HEIGHT = 1280
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def scale_image_to_encoding_size(
|
|
27
|
+
base64_image: str, max_base64_size: int = 180_000, initial_reduction: float = 0.9
|
|
28
|
+
) -> Tuple[str, Tuple[int, int]]:
|
|
29
|
+
"""
|
|
30
|
+
Decodes a base64-encoded image, resizes it if needed, and re-encodes it as base64.
|
|
31
|
+
Ensures the final image size is within the specified limit.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
base64_image : str
|
|
36
|
+
Base64-encoded image string.
|
|
37
|
+
max_base64_size : int, optional
|
|
38
|
+
Maximum allowable size for the base64-encoded image, by default 180,000 characters.
|
|
39
|
+
initial_reduction : float, optional
|
|
40
|
+
Initial reduction step for resizing, by default 0.9.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
Tuple[str, Tuple[int, int]]
|
|
45
|
+
A tuple containing:
|
|
46
|
+
- Base64-encoded PNG image string, resized if necessary.
|
|
47
|
+
- The new size as a tuple (width, height).
|
|
48
|
+
|
|
49
|
+
Raises
|
|
50
|
+
------
|
|
51
|
+
Exception
|
|
52
|
+
If the image cannot be resized below the specified max_base64_size.
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
# Decode the base64 image and open it as a PIL image
|
|
56
|
+
image_data = base64.b64decode(base64_image)
|
|
57
|
+
img = Image.open(io.BytesIO(image_data)).convert("RGB")
|
|
58
|
+
|
|
59
|
+
# Initial image size
|
|
60
|
+
original_size = img.size
|
|
61
|
+
|
|
62
|
+
# Check initial size
|
|
63
|
+
if len(base64_image) <= max_base64_size:
|
|
64
|
+
return base64_image, original_size
|
|
65
|
+
|
|
66
|
+
# Initial reduction step
|
|
67
|
+
reduction_step = initial_reduction
|
|
68
|
+
new_size = original_size
|
|
69
|
+
while len(base64_image) > max_base64_size:
|
|
70
|
+
width, height = img.size
|
|
71
|
+
new_size = (int(width * reduction_step), int(height * reduction_step))
|
|
72
|
+
|
|
73
|
+
img_resized = img.resize(new_size, Image.LANCZOS)
|
|
74
|
+
buffered = io.BytesIO()
|
|
75
|
+
img_resized.save(buffered, format="PNG")
|
|
76
|
+
base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
77
|
+
|
|
78
|
+
# Adjust the reduction step if necessary
|
|
79
|
+
if len(base64_image) > max_base64_size:
|
|
80
|
+
reduction_step *= 0.95 # Reduce size further if needed
|
|
81
|
+
|
|
82
|
+
# Safety check
|
|
83
|
+
if new_size[0] < 1 or new_size[1] < 1:
|
|
84
|
+
raise Exception("Image cannot be resized further without becoming too small.")
|
|
85
|
+
|
|
86
|
+
return base64_image, new_size
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
logger.error(f"Error resizing the image: {e}")
|
|
90
|
+
raise
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def ensure_base64_is_png(base64_image: str) -> str:
|
|
94
|
+
"""
|
|
95
|
+
Ensures the given base64-encoded image is in PNG format. Converts to PNG if necessary.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
base64_image : str
|
|
100
|
+
Base64-encoded image string.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
str
|
|
105
|
+
Base64-encoded PNG image string.
|
|
106
|
+
"""
|
|
107
|
+
try:
|
|
108
|
+
# Decode the base64 string and load the image
|
|
109
|
+
image_data = base64.b64decode(base64_image)
|
|
110
|
+
image = Image.open(io.BytesIO(image_data))
|
|
111
|
+
|
|
112
|
+
# Check if the image is already in PNG format
|
|
113
|
+
if image.format != "PNG":
|
|
114
|
+
# Convert the image to PNG
|
|
115
|
+
buffered = io.BytesIO()
|
|
116
|
+
image.convert("RGB").save(buffered, format="PNG")
|
|
117
|
+
base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
118
|
+
|
|
119
|
+
return base64_image
|
|
120
|
+
except Exception as e:
|
|
121
|
+
logger.error(f"Error ensuring PNG format: {e}")
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def pad_image(
|
|
126
|
+
array: np.ndarray,
|
|
127
|
+
target_width: int = DEFAULT_MAX_WIDTH,
|
|
128
|
+
target_height: int = DEFAULT_MAX_HEIGHT,
|
|
129
|
+
background_color: int = 255,
|
|
130
|
+
dtype=np.uint8,
|
|
131
|
+
) -> Tuple[np.ndarray, Tuple[int, int]]:
|
|
132
|
+
"""
|
|
133
|
+
Pads a NumPy array representing an image to the specified target dimensions.
|
|
134
|
+
|
|
135
|
+
If the target dimensions are smaller than the image dimensions, no padding will be applied
|
|
136
|
+
in that dimension. If the target dimensions are larger, the image will be centered within the
|
|
137
|
+
canvas of the specified target size, with the remaining space filled with white padding.
|
|
138
|
+
|
|
139
|
+
Parameters
|
|
140
|
+
----------
|
|
141
|
+
array : np.ndarray
|
|
142
|
+
The input image as a NumPy array of shape (H, W, C).
|
|
143
|
+
target_width : int, optional
|
|
144
|
+
The desired target width of the padded image. Defaults to DEFAULT_MAX_WIDTH.
|
|
145
|
+
target_height : int, optional
|
|
146
|
+
The desired target height of the padded image. Defaults to DEFAULT_MAX_HEIGHT.
|
|
147
|
+
|
|
148
|
+
Returns
|
|
149
|
+
-------
|
|
150
|
+
padded_array : np.ndarray
|
|
151
|
+
The padded image as a NumPy array of shape (target_height, target_width, C).
|
|
152
|
+
padding_offsets : Tuple[int, int]
|
|
153
|
+
A tuple containing the horizontal and vertical offsets (pad_width, pad_height) applied to center the image.
|
|
154
|
+
|
|
155
|
+
Notes
|
|
156
|
+
-----
|
|
157
|
+
If the target dimensions are smaller than the current image dimensions, no padding will be applied
|
|
158
|
+
in that dimension, and the image will retain its original size in that dimension.
|
|
159
|
+
|
|
160
|
+
Examples
|
|
161
|
+
--------
|
|
162
|
+
>>> image = np.random.randint(0, 255, (600, 800, 3), dtype=np.uint8)
|
|
163
|
+
>>> padded_image, offsets = pad_image(image, target_width=1000, target_height=1000)
|
|
164
|
+
>>> padded_image.shape
|
|
165
|
+
(1000, 1000, 3)
|
|
166
|
+
>>> offsets
|
|
167
|
+
(100, 200)
|
|
168
|
+
"""
|
|
169
|
+
height, width = array.shape[:2]
|
|
170
|
+
|
|
171
|
+
# Determine the padding needed, if any, while ensuring no padding is applied if the target is smaller
|
|
172
|
+
pad_height = max((target_height - height) // 2, 0)
|
|
173
|
+
pad_width = max((target_width - width) // 2, 0)
|
|
174
|
+
|
|
175
|
+
# Determine final canvas size (may be equal to original if target is smaller)
|
|
176
|
+
final_height = max(height, target_height)
|
|
177
|
+
final_width = max(width, target_width)
|
|
178
|
+
|
|
179
|
+
# Create the canvas and place the original image on it
|
|
180
|
+
canvas = background_color * np.ones((final_height, final_width, array.shape[2]), dtype=dtype)
|
|
181
|
+
canvas[pad_height : pad_height + height, pad_width : pad_width + width] = array # noqa: E203
|
|
182
|
+
|
|
183
|
+
return canvas, (pad_width, pad_height)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def check_numpy_image_size(image: np.ndarray, min_height: int, min_width: int) -> bool:
|
|
187
|
+
"""
|
|
188
|
+
Checks if the height and width of the image are larger than the specified minimum values.
|
|
189
|
+
|
|
190
|
+
Parameters:
|
|
191
|
+
image (np.ndarray): The image array (assumed to be in shape (H, W, C) or (H, W)).
|
|
192
|
+
min_height (int): The minimum height required.
|
|
193
|
+
min_width (int): The minimum width required.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
bool: True if the image dimensions are larger than or equal to the minimum size, False otherwise.
|
|
197
|
+
"""
|
|
198
|
+
# Check if the image has at least 2 dimensions
|
|
199
|
+
if image.ndim < 2:
|
|
200
|
+
raise ValueError("The input array does not have sufficient dimensions for an image.")
|
|
201
|
+
|
|
202
|
+
height, width = image.shape[:2]
|
|
203
|
+
return height >= min_height and width >= min_width
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def crop_image(
|
|
207
|
+
array: np.array, bbox: Tuple[int, int, int, int], min_width: int = 1, min_height: int = 1
|
|
208
|
+
) -> Optional[np.ndarray]:
|
|
209
|
+
"""
|
|
210
|
+
Crops a NumPy array representing an image according to the specified bounding box.
|
|
211
|
+
|
|
212
|
+
Parameters
|
|
213
|
+
----------
|
|
214
|
+
array : np.array
|
|
215
|
+
The image as a NumPy array.
|
|
216
|
+
bbox : Tuple[int, int, int, int]
|
|
217
|
+
The bounding box to crop the image to, given as (w1, h1, w2, h2).
|
|
218
|
+
min_width : int, optional
|
|
219
|
+
The minimum allowable width for the cropped image. If the cropped width is smaller than this value,
|
|
220
|
+
the function returns None. Default is 1.
|
|
221
|
+
min_height : int, optional
|
|
222
|
+
The minimum allowable height for the cropped image. If the cropped height is smaller than this value,
|
|
223
|
+
the function returns None. Default is 1.
|
|
224
|
+
|
|
225
|
+
Returns
|
|
226
|
+
-------
|
|
227
|
+
Optional[np.ndarray]
|
|
228
|
+
The cropped image as a NumPy array, or None if the bounding box is invalid.
|
|
229
|
+
"""
|
|
230
|
+
w1, h1, w2, h2 = bbox
|
|
231
|
+
h1 = max(floor(h1), 0)
|
|
232
|
+
h2 = min(ceil(h2), array.shape[0])
|
|
233
|
+
w1 = max(floor(w1), 0)
|
|
234
|
+
w2 = min(ceil(w2), array.shape[1])
|
|
235
|
+
|
|
236
|
+
if (w2 - w1 < min_width) or (h2 - h1 < min_height):
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
# Crop the image using the bounding box
|
|
240
|
+
cropped = array[h1:h2, w1:w2]
|
|
241
|
+
|
|
242
|
+
return cropped
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def normalize_image(
|
|
246
|
+
array: np.ndarray,
|
|
247
|
+
r_mean: float = 0.485,
|
|
248
|
+
g_mean: float = 0.456,
|
|
249
|
+
b_mean: float = 0.406,
|
|
250
|
+
r_std: float = 0.229,
|
|
251
|
+
g_std: float = 0.224,
|
|
252
|
+
b_std: float = 0.225,
|
|
253
|
+
) -> np.ndarray:
|
|
254
|
+
"""
|
|
255
|
+
Normalizes an RGB image by applying a mean and standard deviation to each channel.
|
|
256
|
+
|
|
257
|
+
Parameters:
|
|
258
|
+
----------
|
|
259
|
+
array : np.ndarray
|
|
260
|
+
The input image array, which can be either grayscale or RGB. The image should have a shape of
|
|
261
|
+
(height, width, 3) for RGB images, or (height, width) or (height, width, 1) for grayscale images.
|
|
262
|
+
If a grayscale image is provided, it will be converted to RGB format by repeating the grayscale values
|
|
263
|
+
across all three channels (R, G, B).
|
|
264
|
+
r_mean : float, optional
|
|
265
|
+
The mean to be subtracted from the red channel (default is 0.485).
|
|
266
|
+
g_mean : float, optional
|
|
267
|
+
The mean to be subtracted from the green channel (default is 0.456).
|
|
268
|
+
b_mean : float, optional
|
|
269
|
+
The mean to be subtracted from the blue channel (default is 0.406).
|
|
270
|
+
r_std : float, optional
|
|
271
|
+
The standard deviation to divide the red channel by (default is 0.229).
|
|
272
|
+
g_std : float, optional
|
|
273
|
+
The standard deviation to divide the green channel by (default is 0.224).
|
|
274
|
+
b_std : float, optional
|
|
275
|
+
The standard deviation to divide the blue channel by (default is 0.225).
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
-------
|
|
279
|
+
np.ndarray
|
|
280
|
+
A normalized image array with the same shape as the input, where the RGB channels have been normalized
|
|
281
|
+
by the given means and standard deviations.
|
|
282
|
+
|
|
283
|
+
Notes:
|
|
284
|
+
-----
|
|
285
|
+
The input pixel values should be in the range [0, 255], and the function scales these values to [0, 1]
|
|
286
|
+
before applying normalization.
|
|
287
|
+
|
|
288
|
+
If the input image is grayscale, it is converted to an RGB image by duplicating the grayscale values
|
|
289
|
+
across the three color channels.
|
|
290
|
+
"""
|
|
291
|
+
# If the input is a grayscale image with shape (height, width) or (height, width, 1),
|
|
292
|
+
# convert it to RGB with shape (height, width, 3).
|
|
293
|
+
if array.ndim == 2 or array.shape[2] == 1:
|
|
294
|
+
array = np.dstack((array, 255 * np.ones_like(array), 255 * np.ones_like(array)))
|
|
295
|
+
|
|
296
|
+
height, width = array.shape[:2]
|
|
297
|
+
|
|
298
|
+
mean = np.array([r_mean, g_mean, b_mean]).reshape((1, 1, 3)).astype(np.float32)
|
|
299
|
+
std = np.array([r_std, g_std, b_std]).reshape((1, 1, 3)).astype(np.float32)
|
|
300
|
+
output_array = (array.astype("float32") / 255.0 - mean) / std
|
|
301
|
+
|
|
302
|
+
return output_array
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def numpy_to_base64(array: np.ndarray) -> str:
|
|
306
|
+
"""
|
|
307
|
+
Converts a NumPy array representing an image to a base64-encoded string.
|
|
308
|
+
|
|
309
|
+
The function takes a NumPy array, converts it to a PIL image, and then encodes
|
|
310
|
+
the image as a PNG in a base64 string format. The input array is expected to be in
|
|
311
|
+
a format that can be converted to a valid image, such as having a shape of (H, W, C)
|
|
312
|
+
where C is the number of channels (e.g., 3 for RGB).
|
|
313
|
+
|
|
314
|
+
Parameters
|
|
315
|
+
----------
|
|
316
|
+
array : np.ndarray
|
|
317
|
+
The input image as a NumPy array. Must have a shape compatible with image data.
|
|
318
|
+
|
|
319
|
+
Returns
|
|
320
|
+
-------
|
|
321
|
+
str
|
|
322
|
+
The base64-encoded string representation of the input NumPy array as a PNG image.
|
|
323
|
+
|
|
324
|
+
Raises
|
|
325
|
+
------
|
|
326
|
+
ValueError
|
|
327
|
+
If the input array cannot be converted into a valid image format.
|
|
328
|
+
RuntimeError
|
|
329
|
+
If there is an issue during the image conversion or base64 encoding process.
|
|
330
|
+
|
|
331
|
+
Examples
|
|
332
|
+
--------
|
|
333
|
+
>>> array = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
|
|
334
|
+
>>> encoded_str = numpy_to_base64(array)
|
|
335
|
+
>>> isinstance(encoded_str, str)
|
|
336
|
+
True
|
|
337
|
+
"""
|
|
338
|
+
# If the array represents a grayscale image, drop the redundant axis in
|
|
339
|
+
# (h, w, 1). PIL.Image.fromarray() expects an array of form (h, w) if it's
|
|
340
|
+
# a grayscale image.
|
|
341
|
+
if array.ndim == 3 and array.shape[2] == 1:
|
|
342
|
+
array = np.squeeze(array, axis=2)
|
|
343
|
+
|
|
344
|
+
# Check if the array is valid and can be converted to an image
|
|
345
|
+
try:
|
|
346
|
+
# Convert the NumPy array to a PIL image
|
|
347
|
+
pil_image = Image.fromarray(array.astype(np.uint8))
|
|
348
|
+
except Exception as e:
|
|
349
|
+
raise ValueError(f"Failed to convert NumPy array to image: {e}")
|
|
350
|
+
|
|
351
|
+
try:
|
|
352
|
+
# Convert the PIL image to a base64-encoded string
|
|
353
|
+
with BytesIO() as buffer:
|
|
354
|
+
pil_image.save(buffer, format="PNG")
|
|
355
|
+
base64_img = bytetools.base64frombytes(buffer.getvalue())
|
|
356
|
+
except Exception as e:
|
|
357
|
+
raise RuntimeError(f"Failed to encode image to base64: {e}")
|
|
358
|
+
|
|
359
|
+
return base64_img
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def base64_to_numpy(base64_string: str) -> np.ndarray:
|
|
363
|
+
"""
|
|
364
|
+
Convert a base64-encoded image string to a NumPy array.
|
|
365
|
+
|
|
366
|
+
Parameters
|
|
367
|
+
----------
|
|
368
|
+
base64_string : str
|
|
369
|
+
Base64-encoded string representing an image.
|
|
370
|
+
|
|
371
|
+
Returns
|
|
372
|
+
-------
|
|
373
|
+
numpy.ndarray
|
|
374
|
+
NumPy array representation of the decoded image.
|
|
375
|
+
|
|
376
|
+
Raises
|
|
377
|
+
------
|
|
378
|
+
ValueError
|
|
379
|
+
If the base64 string is invalid or cannot be decoded into an image.
|
|
380
|
+
ImportError
|
|
381
|
+
If required libraries are not installed.
|
|
382
|
+
|
|
383
|
+
Examples
|
|
384
|
+
--------
|
|
385
|
+
>>> base64_str = '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBD...'
|
|
386
|
+
>>> img_array = base64_to_numpy(base64_str)
|
|
387
|
+
"""
|
|
388
|
+
try:
|
|
389
|
+
# Decode the base64 string
|
|
390
|
+
image_data = base64.b64decode(base64_string)
|
|
391
|
+
except (base64.binascii.Error, ValueError) as e:
|
|
392
|
+
raise ValueError("Invalid base64 string") from e
|
|
393
|
+
|
|
394
|
+
try:
|
|
395
|
+
# Convert the bytes into a BytesIO object
|
|
396
|
+
image_bytes = BytesIO(image_data)
|
|
397
|
+
|
|
398
|
+
# Open the image using PIL
|
|
399
|
+
image = Image.open(image_bytes)
|
|
400
|
+
image.load()
|
|
401
|
+
except UnidentifiedImageError as e:
|
|
402
|
+
raise ValueError("Unable to decode image from base64 string") from e
|
|
403
|
+
|
|
404
|
+
# Convert the image to a NumPy array
|
|
405
|
+
image_array = np.array(image)
|
|
406
|
+
|
|
407
|
+
return image_array
|
|
File without changes
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
import sys
|
|
8
|
+
from enum import Enum
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LogLevel(str, Enum):
|
|
12
|
+
DEBUG = "DEBUG"
|
|
13
|
+
INFO = "INFO"
|
|
14
|
+
WARNING = "WARNING"
|
|
15
|
+
ERROR = "ERROR"
|
|
16
|
+
CRITICAL = "CRITICAL"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def configure_logging(logger, level_name):
|
|
20
|
+
"""
|
|
21
|
+
Parameters:
|
|
22
|
+
- level_name (str): The name of the logging level (e.g., "DEBUG", "INFO").
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
numeric_level = getattr(logging, level_name, None)
|
|
26
|
+
if not isinstance(numeric_level, int):
|
|
27
|
+
raise ValueError(f"Invalid log level: {level_name}")
|
|
28
|
+
|
|
29
|
+
logging.StreamHandler(sys.stdout)
|
|
30
|
+
logging.basicConfig(level=numeric_level, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
31
|
+
logger.setLevel(numeric_level)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from .broker import SimpleMessageBroker
|
|
6
|
+
from .broker import ResponseSchema
|
|
7
|
+
from .simple_client import SimpleClient
|
|
8
|
+
|
|
9
|
+
__all__ = ["SimpleMessageBroker", "SimpleClient", "ResponseSchema"]
|