nv-ingest-api 2025.10.4.dev20251004__py3-none-any.whl → 2025.11.2.dev20251102__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/internal/extract/image/chart_extractor.py +7 -3
- nv_ingest_api/internal/extract/image/infographic_extractor.py +7 -3
- nv_ingest_api/internal/extract/image/table_extractor.py +7 -3
- nv_ingest_api/internal/extract/pdf/engines/pdfium.py +6 -4
- nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +11 -4
- nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +4 -0
- nv_ingest_api/internal/primitives/nim/nim_client.py +158 -15
- nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +4 -2
- nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +10 -1
- nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +4 -2
- nv_ingest_api/internal/schemas/extract/extract_image_schema.py +4 -2
- nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +10 -1
- nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +6 -4
- nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +4 -2
- nv_ingest_api/internal/schemas/extract/extract_table_schema.py +9 -1
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +56 -1
- nv_ingest_api/internal/schemas/meta/metadata_schema.py +9 -0
- nv_ingest_api/internal/schemas/mixins.py +39 -0
- nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +4 -0
- nv_ingest_api/internal/transform/embed_text.py +82 -0
- nv_ingest_api/util/dataloader/dataloader.py +20 -9
- nv_ingest_api/util/image_processing/transforms.py +67 -1
- nv_ingest_api/util/message_brokers/qos_scheduler.py +283 -0
- nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +1 -0
- nv_ingest_api/util/multi_processing/mp_pool_singleton.py +8 -2
- nv_ingest_api/util/service_clients/redis/redis_client.py +160 -0
- nv_ingest_api/util/service_clients/rest/rest_client.py +42 -3
- nv_ingest_api/util/string_processing/yaml.py +41 -4
- {nv_ingest_api-2025.10.4.dev20251004.dist-info → nv_ingest_api-2025.11.2.dev20251102.dist-info}/METADATA +2 -1
- {nv_ingest_api-2025.10.4.dev20251004.dist-info → nv_ingest_api-2025.11.2.dev20251102.dist-info}/RECORD +34 -32
- udfs/llm_summarizer_udf.py +132 -137
- {nv_ingest_api-2025.10.4.dev20251004.dist-info → nv_ingest_api-2025.11.2.dev20251102.dist-info}/WHEEL +0 -0
- {nv_ingest_api-2025.10.4.dev20251004.dist-info → nv_ingest_api-2025.11.2.dev20251102.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.10.4.dev20251004.dist-info → nv_ingest_api-2025.11.2.dev20251102.dist-info}/top_level.txt +0 -0
|
@@ -24,8 +24,41 @@ logger = logging.getLogger(__name__)
|
|
|
24
24
|
# Tracing Options Schema
|
|
25
25
|
class TracingOptionsSchema(BaseModelNoExt):
|
|
26
26
|
trace: bool = False
|
|
27
|
-
ts_send: int
|
|
27
|
+
ts_send: Optional[int] = None
|
|
28
28
|
trace_id: Optional[str] = None
|
|
29
|
+
# V2 PDF splitting support
|
|
30
|
+
parent_job_id: Optional[str] = None
|
|
31
|
+
page_num: Optional[int] = None
|
|
32
|
+
total_pages: Optional[int] = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# PDF Configuration Schema
|
|
36
|
+
class PdfConfigSchema(BaseModelNoExt):
|
|
37
|
+
"""PDF-specific configuration options for job submission.
|
|
38
|
+
|
|
39
|
+
Note: split_page_count accepts any positive integer but will be clamped
|
|
40
|
+
to [1, 128] range by the server at runtime.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
split_page_count: Annotated[int, Field(ge=1)] = 32
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class RoutingOptionsSchema(BaseModelNoExt):
|
|
47
|
+
# Queue routing hint for QoS scheduler
|
|
48
|
+
queue_hint: Optional[str] = None
|
|
49
|
+
|
|
50
|
+
@field_validator("queue_hint")
|
|
51
|
+
@classmethod
|
|
52
|
+
def validate_queue_hint(cls, v):
|
|
53
|
+
if v is None:
|
|
54
|
+
return v
|
|
55
|
+
if not isinstance(v, str):
|
|
56
|
+
raise ValueError("queue_hint must be a string")
|
|
57
|
+
s = v.lower()
|
|
58
|
+
allowed = {"default", "immediate", "micro", "small", "medium", "large"}
|
|
59
|
+
if s not in allowed:
|
|
60
|
+
raise ValueError("queue_hint must be one of: default, immediate, micro, small, medium, large")
|
|
61
|
+
return s
|
|
29
62
|
|
|
30
63
|
|
|
31
64
|
# Ingest Task Schemas
|
|
@@ -111,6 +144,8 @@ class IngestTaskEmbedSchema(BaseModelNoExt):
|
|
|
111
144
|
image_elements_modality: Optional[str] = None
|
|
112
145
|
structured_elements_modality: Optional[str] = None
|
|
113
146
|
audio_elements_modality: Optional[str] = None
|
|
147
|
+
custom_content_field: Optional[str] = None
|
|
148
|
+
result_target_field: Optional[str] = None
|
|
114
149
|
|
|
115
150
|
|
|
116
151
|
class IngestTaskVdbUploadSchema(BaseModelNoExt):
|
|
@@ -266,6 +301,26 @@ class IngestJobSchema(BaseModelNoExt):
|
|
|
266
301
|
job_id: Union[str, int]
|
|
267
302
|
tasks: List[IngestTaskSchema]
|
|
268
303
|
tracing_options: Optional[TracingOptionsSchema] = None
|
|
304
|
+
routing_options: Optional[RoutingOptionsSchema] = None
|
|
305
|
+
pdf_config: Optional[PdfConfigSchema] = None
|
|
306
|
+
|
|
307
|
+
@model_validator(mode="before")
|
|
308
|
+
@classmethod
|
|
309
|
+
def migrate_queue_hint(cls, values):
|
|
310
|
+
"""
|
|
311
|
+
Backward-compatibility shim: if a legacy client sends
|
|
312
|
+
tracing_options.queue_hint, move it into routing_options.queue_hint.
|
|
313
|
+
"""
|
|
314
|
+
try:
|
|
315
|
+
topt = values.get("tracing_options") or {}
|
|
316
|
+
ropt = values.get("routing_options") or {}
|
|
317
|
+
if isinstance(topt, dict) and "queue_hint" in topt and "queue_hint" not in ropt:
|
|
318
|
+
ropt["queue_hint"] = topt.pop("queue_hint")
|
|
319
|
+
values["routing_options"] = ropt
|
|
320
|
+
values["tracing_options"] = topt
|
|
321
|
+
except Exception:
|
|
322
|
+
pass
|
|
323
|
+
return values
|
|
269
324
|
|
|
270
325
|
|
|
271
326
|
# ------------------------------------------------------------------------------
|
|
@@ -352,6 +352,15 @@ class MetadataSchema(BaseModelNoExt):
|
|
|
352
352
|
raise_on_failure: bool = False
|
|
353
353
|
"""If True, indicates that processing should halt on failure."""
|
|
354
354
|
|
|
355
|
+
total_pages: Optional[int] = None
|
|
356
|
+
"""Total number of pages in the source document (V2 API)."""
|
|
357
|
+
|
|
358
|
+
original_source_id: Optional[str] = None
|
|
359
|
+
"""The original source identifier before any splitting or chunking (V2 API)."""
|
|
360
|
+
|
|
361
|
+
original_source_name: Optional[str] = None
|
|
362
|
+
"""The original source name before any splitting or chunking (V2 API)."""
|
|
363
|
+
|
|
355
364
|
custom_content: Optional[Dict[str, Any]] = None
|
|
356
365
|
|
|
357
366
|
@model_validator(mode="before")
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Shared mixins for Pydantic schemas.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Any
|
|
10
|
+
from pydantic import BaseModel, field_validator
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LowercaseProtocolMixin(BaseModel):
|
|
14
|
+
"""
|
|
15
|
+
Mixin that automatically lowercases any field ending with '_infer_protocol'.
|
|
16
|
+
|
|
17
|
+
This ensures case-insensitive handling of protocol values (e.g., "HTTP" -> "http").
|
|
18
|
+
Apply this mixin to any schema that has protocol fields to normalize user input.
|
|
19
|
+
|
|
20
|
+
Examples
|
|
21
|
+
--------
|
|
22
|
+
>>> class MyConfigSchema(LowercaseProtocolMixin):
|
|
23
|
+
... yolox_infer_protocol: str = ""
|
|
24
|
+
... ocr_infer_protocol: str = ""
|
|
25
|
+
>>>
|
|
26
|
+
>>> config = MyConfigSchema(yolox_infer_protocol="GRPC", ocr_infer_protocol="HTTP")
|
|
27
|
+
>>> config.yolox_infer_protocol
|
|
28
|
+
'grpc'
|
|
29
|
+
>>> config.ocr_infer_protocol
|
|
30
|
+
'http'
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
@field_validator("*", mode="before")
|
|
34
|
+
@classmethod
|
|
35
|
+
def _lowercase_protocol_fields(cls, v: Any, info):
|
|
36
|
+
"""Lowercase any field ending with '_infer_protocol'."""
|
|
37
|
+
if info.field_name.endswith("_infer_protocol") and v is not None:
|
|
38
|
+
return str(v).strip().lower()
|
|
39
|
+
return v
|
|
@@ -7,6 +7,8 @@ import logging
|
|
|
7
7
|
|
|
8
8
|
from pydantic import ConfigDict, BaseModel, Field, model_validator, field_validator
|
|
9
9
|
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
10
12
|
from nv_ingest_api.util.logging.configuration import LogLevel
|
|
11
13
|
|
|
12
14
|
logger = logging.getLogger(__name__)
|
|
@@ -26,6 +28,8 @@ class TextEmbeddingSchema(BaseModel):
|
|
|
26
28
|
image_elements_modality: str = Field(default="text")
|
|
27
29
|
structured_elements_modality: str = Field(default="text")
|
|
28
30
|
audio_elements_modality: str = Field(default="text")
|
|
31
|
+
custom_content_field: Optional[str] = None
|
|
32
|
+
result_target_field: Optional[str] = None
|
|
29
33
|
|
|
30
34
|
model_config = ConfigDict(extra="forbid")
|
|
31
35
|
|
|
@@ -7,6 +7,7 @@ from concurrent.futures import ThreadPoolExecutor
|
|
|
7
7
|
from functools import partial
|
|
8
8
|
from typing import Any, Dict, Tuple, Optional, Iterable, List
|
|
9
9
|
|
|
10
|
+
import glom
|
|
10
11
|
import pandas as pd
|
|
11
12
|
from openai import OpenAI
|
|
12
13
|
|
|
@@ -282,6 +283,33 @@ def _add_embeddings(row, embeddings, info_msgs):
|
|
|
282
283
|
return row
|
|
283
284
|
|
|
284
285
|
|
|
286
|
+
def _add_custom_embeddings(row, embeddings, result_target_field):
|
|
287
|
+
"""
|
|
288
|
+
Updates a DataFrame row with embedding data and associated error info
|
|
289
|
+
based on a user supplied custom content field.
|
|
290
|
+
|
|
291
|
+
Parameters
|
|
292
|
+
----------
|
|
293
|
+
row : pandas.Series
|
|
294
|
+
A row of the DataFrame.
|
|
295
|
+
embeddings : dict
|
|
296
|
+
Dictionary mapping row indices to embeddings.
|
|
297
|
+
result_target_field: str
|
|
298
|
+
The field in custom_content to output the embeddings to
|
|
299
|
+
|
|
300
|
+
Returns
|
|
301
|
+
-------
|
|
302
|
+
pandas.Series
|
|
303
|
+
The updated row
|
|
304
|
+
"""
|
|
305
|
+
embedding = embeddings.get(row.name, None)
|
|
306
|
+
|
|
307
|
+
if embedding is not None:
|
|
308
|
+
row["metadata"] = glom.assign(row["metadata"], "custom_content." + result_target_field, embedding, missing=dict)
|
|
309
|
+
|
|
310
|
+
return row
|
|
311
|
+
|
|
312
|
+
|
|
285
313
|
def _format_image_input_string(image_b64: Optional[str]) -> str:
|
|
286
314
|
if not image_b64:
|
|
287
315
|
return
|
|
@@ -381,6 +409,20 @@ def _get_pandas_audio_content(row, modality="text"):
|
|
|
381
409
|
return row.get("audio_metadata", {}).get("audio_transcript")
|
|
382
410
|
|
|
383
411
|
|
|
412
|
+
def _get_pandas_custom_content(row, custom_content_field):
|
|
413
|
+
custom_content = row.get("custom_content", {})
|
|
414
|
+
content = glom.glom(custom_content, custom_content_field, default=None)
|
|
415
|
+
if content is None:
|
|
416
|
+
logger.warning(f"Custom content field: {custom_content_field} not found")
|
|
417
|
+
return None
|
|
418
|
+
|
|
419
|
+
try:
|
|
420
|
+
return str(content)
|
|
421
|
+
except (TypeError, ValueError):
|
|
422
|
+
logger.warning(f"Cannot convert custom content field: {custom_content_field} to string")
|
|
423
|
+
return None
|
|
424
|
+
|
|
425
|
+
|
|
384
426
|
# ------------------------------------------------------------------------------
|
|
385
427
|
# Batch Processing Utilities
|
|
386
428
|
# ------------------------------------------------------------------------------
|
|
@@ -519,6 +561,7 @@ def transform_create_text_embeddings_internal(
|
|
|
519
561
|
api_key = task_config.get("api_key") or transform_config.api_key
|
|
520
562
|
endpoint_url = task_config.get("endpoint_url") or transform_config.embedding_nim_endpoint
|
|
521
563
|
model_name = task_config.get("model_name") or transform_config.embedding_model
|
|
564
|
+
custom_content_field = task_config.get("custom_content_field") or transform_config.custom_content_field
|
|
522
565
|
|
|
523
566
|
if execution_trace_log is None:
|
|
524
567
|
execution_trace_log = {}
|
|
@@ -612,4 +655,43 @@ def transform_create_text_embeddings_internal(
|
|
|
612
655
|
content_masks.append(content_mask)
|
|
613
656
|
|
|
614
657
|
combined_df = _concatenate_extractions_pandas(df_transform_ledger, embedding_dataframes, content_masks)
|
|
658
|
+
|
|
659
|
+
# Embed custom content
|
|
660
|
+
if custom_content_field is not None:
|
|
661
|
+
result_target_field = task_config.get("result_target_field") or custom_content_field + "_embedding"
|
|
662
|
+
|
|
663
|
+
extracted_custom_content = (
|
|
664
|
+
combined_df["metadata"]
|
|
665
|
+
.apply(partial(_get_pandas_custom_content, custom_content_field=custom_content_field))
|
|
666
|
+
.apply(lambda x: x.strip() if isinstance(x, str) and x.strip() else None)
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
valid_custom_content_mask = extracted_custom_content.notna()
|
|
670
|
+
if valid_custom_content_mask.any():
|
|
671
|
+
custom_content_list = extracted_custom_content[valid_custom_content_mask].to_list()
|
|
672
|
+
custom_content_batches = _generate_batches(custom_content_list, batch_size=transform_config.batch_size)
|
|
673
|
+
|
|
674
|
+
custom_content_embeddings = _async_runner(
|
|
675
|
+
custom_content_batches,
|
|
676
|
+
api_key,
|
|
677
|
+
endpoint_url,
|
|
678
|
+
model_name,
|
|
679
|
+
transform_config.encoding_format,
|
|
680
|
+
transform_config.input_type,
|
|
681
|
+
transform_config.truncate,
|
|
682
|
+
False,
|
|
683
|
+
)
|
|
684
|
+
custom_embeddings_dict = dict(
|
|
685
|
+
zip(
|
|
686
|
+
extracted_custom_content.loc[valid_custom_content_mask].index,
|
|
687
|
+
custom_content_embeddings.get("embeddings", []),
|
|
688
|
+
)
|
|
689
|
+
)
|
|
690
|
+
else:
|
|
691
|
+
custom_embeddings_dict = {}
|
|
692
|
+
|
|
693
|
+
combined_df = combined_df.apply(
|
|
694
|
+
_add_custom_embeddings, embeddings=custom_embeddings_dict, result_target_field=result_target_field, axis=1
|
|
695
|
+
)
|
|
696
|
+
|
|
615
697
|
return combined_df, {"trace_info": execution_trace_log}
|
|
@@ -254,22 +254,29 @@ else:
|
|
|
254
254
|
file = None
|
|
255
255
|
try:
|
|
256
256
|
for file in paths:
|
|
257
|
+
if thread_stop.is_set():
|
|
258
|
+
return
|
|
257
259
|
if isinstance(file, tuple):
|
|
258
260
|
video_file, audio_file = file
|
|
261
|
+
if thread_stop.is_set():
|
|
262
|
+
return
|
|
259
263
|
with open(video_file, "rb") as f:
|
|
260
264
|
video = f.read()
|
|
265
|
+
if thread_stop.is_set():
|
|
266
|
+
return
|
|
261
267
|
with open(audio_file, "rb") as f:
|
|
262
268
|
audio = f.read()
|
|
263
269
|
queue.put((video, audio))
|
|
264
270
|
else:
|
|
265
|
-
if thread_stop:
|
|
271
|
+
if thread_stop.is_set():
|
|
266
272
|
return
|
|
267
273
|
with open(file, "rb") as f:
|
|
268
274
|
queue.put(f.read())
|
|
269
275
|
except Exception as e:
|
|
270
276
|
logging.error(f"Error processing file {file}: {e}")
|
|
271
277
|
queue.put(RuntimeError(f"Error processing file {file}: {e}"))
|
|
272
|
-
|
|
278
|
+
finally:
|
|
279
|
+
queue.put(StopIteration)
|
|
273
280
|
|
|
274
281
|
class DataLoader:
|
|
275
282
|
"""
|
|
@@ -290,7 +297,7 @@ else:
|
|
|
290
297
|
):
|
|
291
298
|
interface = interface if interface else MediaInterface()
|
|
292
299
|
self.thread = None
|
|
293
|
-
self.thread_stop =
|
|
300
|
+
self.thread_stop = threading.Event()
|
|
294
301
|
self.queue = queue.Queue(size)
|
|
295
302
|
self.path = Path(path)
|
|
296
303
|
self.output_dir = output_dir
|
|
@@ -323,16 +330,20 @@ else:
|
|
|
323
330
|
Reset itertor by stopping the thread and clearing the queue.
|
|
324
331
|
"""
|
|
325
332
|
if self.thread:
|
|
326
|
-
self.thread_stop
|
|
333
|
+
self.thread_stop.set()
|
|
327
334
|
self.thread.join()
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
self.queue.
|
|
335
|
+
self.thread = None
|
|
336
|
+
try:
|
|
337
|
+
while True:
|
|
338
|
+
self.queue.get_nowait()
|
|
339
|
+
except Exception:
|
|
340
|
+
pass
|
|
341
|
+
finally:
|
|
342
|
+
self.thread_stop.clear()
|
|
332
343
|
|
|
333
344
|
def __iter__(self):
|
|
334
345
|
self.stop()
|
|
335
|
-
self.thread_stop
|
|
346
|
+
self.thread_stop.clear()
|
|
336
347
|
self.thread = threading.Thread(
|
|
337
348
|
target=load_data,
|
|
338
349
|
args=(
|
|
@@ -49,6 +49,68 @@ def _resize_image_opencv(
|
|
|
49
49
|
return cv2.resize(array, target_size, interpolation=interpolation)
|
|
50
50
|
|
|
51
51
|
|
|
52
|
+
def rgba_to_rgb_white_bg(rgba_image):
|
|
53
|
+
"""
|
|
54
|
+
Convert RGBA image to RGB by blending with a white background.
|
|
55
|
+
|
|
56
|
+
This function properly handles transparency by alpha-blending transparent
|
|
57
|
+
and semi-transparent pixels with a white background, producing visually
|
|
58
|
+
accurate results that match how the image would appear when displayed.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
rgba_image : numpy.ndarray
|
|
63
|
+
Input image array with shape (height, width, 4) where the channels
|
|
64
|
+
are Red, Green, Blue, Alpha. Alpha values can be in range [0, 1]
|
|
65
|
+
(float) or [0, 255] (uint8).
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
numpy.ndarray
|
|
70
|
+
RGB image array with shape (height, width, 3) and dtype uint8.
|
|
71
|
+
Values are in range [0, 255] representing Red, Green, Blue channels.
|
|
72
|
+
|
|
73
|
+
Notes
|
|
74
|
+
-----
|
|
75
|
+
The alpha blending formula used is:
|
|
76
|
+
RGB_out = RGB_in * alpha + background * (1 - alpha)
|
|
77
|
+
|
|
78
|
+
Where background is white (255, 255, 255).
|
|
79
|
+
|
|
80
|
+
For pixels with alpha = 1.0 (fully opaque), the original RGB values
|
|
81
|
+
are preserved. For pixels with alpha = 0.0 (fully transparent), the
|
|
82
|
+
result is white. Semi-transparent pixels are blended proportionally.
|
|
83
|
+
|
|
84
|
+
Examples
|
|
85
|
+
--------
|
|
86
|
+
>>> import numpy as np
|
|
87
|
+
>>> # Create a sample RGBA image with some transparency
|
|
88
|
+
>>> rgba = np.random.randint(0, 256, (100, 100, 4), dtype=np.uint8)
|
|
89
|
+
>>> rgb = rgba_to_rgb_white_bg(rgba)
|
|
90
|
+
>>> print(rgb.shape) # (100, 100, 3)
|
|
91
|
+
>>> print(rgb.dtype) # uint8
|
|
92
|
+
|
|
93
|
+
>>> # Example with float alpha values [0, 1]
|
|
94
|
+
>>> rgba_float = np.random.rand(50, 50, 4).astype(np.float32)
|
|
95
|
+
>>> rgb_float = rgba_to_rgb_white_bg(rgba_float)
|
|
96
|
+
>>> print(rgb_float.dtype) # uint8
|
|
97
|
+
"""
|
|
98
|
+
# Extract RGB and alpha channels
|
|
99
|
+
rgb = rgba_image[:, :, :3] # RGB channels (H, W, 3)
|
|
100
|
+
alpha = rgba_image[:, :, 3:4] # Alpha channel (H, W, 1)
|
|
101
|
+
|
|
102
|
+
# Normalize alpha to [0, 1] range if it's in [0, 255] range
|
|
103
|
+
if alpha.max() > 1.0:
|
|
104
|
+
alpha = alpha / 255.0
|
|
105
|
+
|
|
106
|
+
# Alpha blend with white background using the formula:
|
|
107
|
+
# result = foreground * alpha + background * (1 - alpha)
|
|
108
|
+
rgb_image = rgb * alpha + 255 * (1 - alpha)
|
|
109
|
+
|
|
110
|
+
# Convert to uint8 format for standard image representation
|
|
111
|
+
return rgb_image.astype(np.uint8)
|
|
112
|
+
|
|
113
|
+
|
|
52
114
|
def scale_image_to_encoding_size(
|
|
53
115
|
base64_image: str, max_base64_size: int = 180_000, initial_reduction: float = 0.9, format: str = "PNG", **kwargs
|
|
54
116
|
) -> Tuple[str, Tuple[int, int]]:
|
|
@@ -93,7 +155,7 @@ def scale_image_to_encoding_size(
|
|
|
93
155
|
|
|
94
156
|
# Check initial size
|
|
95
157
|
if len(base64_image) <= max_base64_size:
|
|
96
|
-
return
|
|
158
|
+
return numpy_to_base64(img_array, format=format, **kwargs), original_size
|
|
97
159
|
|
|
98
160
|
# Initial reduction step
|
|
99
161
|
reduction_step = initial_reduction
|
|
@@ -621,6 +683,10 @@ def base64_to_numpy(base64_string: str) -> np.ndarray:
|
|
|
621
683
|
if img is None:
|
|
622
684
|
raise ValueError("OpenCV failed to decode image")
|
|
623
685
|
|
|
686
|
+
# Convert 4 channel to 3 channel if necessary
|
|
687
|
+
if img.shape[2] == 4:
|
|
688
|
+
img = rgba_to_rgb_white_bg(img)
|
|
689
|
+
|
|
624
690
|
# Convert BGR to RGB for consistent processing (OpenCV loads as BGR)
|
|
625
691
|
# Only convert if it's a 3-channel color image
|
|
626
692
|
if img.ndim == 3 and img.shape[2] == 3:
|