nv-ingest-api 2025.7.14.dev20250714__py3-none-any.whl → 2025.7.16.dev20250716__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/internal/enums/common.py +6 -0
- nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +2 -1
- nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +32 -20
- nv_ingest_api/internal/extract/pdf/engines/pdfium.py +25 -1
- nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +1 -0
- nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +7 -12
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +5 -0
- nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +4 -0
- nv_ingest_api/internal/transform/embed_text.py +103 -12
- nv_ingest_api/util/image_processing/transforms.py +335 -82
- nv_ingest_api/util/metadata/aggregators.py +4 -1
- nv_ingest_api/util/pdf/pdfium.py +5 -13
- {nv_ingest_api-2025.7.14.dev20250714.dist-info → nv_ingest_api-2025.7.16.dev20250716.dist-info}/METADATA +2 -1
- {nv_ingest_api-2025.7.14.dev20250714.dist-info → nv_ingest_api-2025.7.16.dev20250716.dist-info}/RECORD +17 -17
- {nv_ingest_api-2025.7.14.dev20250714.dist-info → nv_ingest_api-2025.7.16.dev20250716.dist-info}/WHEEL +0 -0
- {nv_ingest_api-2025.7.14.dev20250714.dist-info → nv_ingest_api-2025.7.16.dev20250716.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.7.14.dev20250714.dist-info → nv_ingest_api-2025.7.16.dev20250716.dist-info}/top_level.txt +0 -0
|
@@ -52,6 +52,8 @@ class ContentDescriptionEnum(str, Enum):
|
|
|
52
52
|
Description for image extracted from PDF document.
|
|
53
53
|
PDF_INFOGRAPHIC : str
|
|
54
54
|
Description for structured infographic extracted from PDF document.
|
|
55
|
+
PDF_PAGE_IMAGE : str
|
|
56
|
+
Description for a full-page image rendered from a PDF document.
|
|
55
57
|
PDF_TABLE : str
|
|
56
58
|
Description for structured table extracted from PDF document.
|
|
57
59
|
PDF_TEXT : str
|
|
@@ -70,6 +72,7 @@ class ContentDescriptionEnum(str, Enum):
|
|
|
70
72
|
PDF_CHART: str = "Structured chart extracted from PDF document."
|
|
71
73
|
PDF_IMAGE: str = "Image extracted from PDF document."
|
|
72
74
|
PDF_INFOGRAPHIC: str = "Structured infographic extracted from PDF document."
|
|
75
|
+
PDF_PAGE_IMAGE: str = "Full-page image rendered from a PDF document."
|
|
73
76
|
PDF_TABLE: str = "Structured table extracted from PDF document."
|
|
74
77
|
PDF_TEXT: str = "Unstructured text from PDF document."
|
|
75
78
|
PPTX_IMAGE: str = "Image extracted from PPTX presentation."
|
|
@@ -94,6 +97,8 @@ class ContentTypeEnum(str, Enum):
|
|
|
94
97
|
Represents image content.
|
|
95
98
|
INFO_MSG : str
|
|
96
99
|
Represents an informational message.
|
|
100
|
+
PAGE_IMAGE : str
|
|
101
|
+
Represents a full-page image rendered from a document.
|
|
97
102
|
STRUCTURED : str
|
|
98
103
|
Represents structured content.
|
|
99
104
|
TEXT : str
|
|
@@ -111,6 +116,7 @@ class ContentTypeEnum(str, Enum):
|
|
|
111
116
|
INFOGRAPHIC: str = "infographic"
|
|
112
117
|
INFO_MSG: str = "info_message"
|
|
113
118
|
NONE: str = "none"
|
|
119
|
+
PAGE_IMAGE: str = "page_image"
|
|
114
120
|
STRUCTURED: str = "structured"
|
|
115
121
|
TABLE: str = "table"
|
|
116
122
|
TEXT: str = "text"
|
|
@@ -40,6 +40,7 @@ from nv_ingest_api.internal.schemas.meta.metadata_schema import validate_metadat
|
|
|
40
40
|
from nv_ingest_api.internal.primitives.nim.model_interface.yolox import (
|
|
41
41
|
YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
|
|
42
42
|
YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
|
|
43
|
+
YOLOX_PAGE_IMAGE_FORMAT,
|
|
43
44
|
)
|
|
44
45
|
from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import NemoRetrieverParseConfigSchema
|
|
45
46
|
from nv_ingest_api.util.metadata.aggregators import (
|
|
@@ -355,7 +356,7 @@ def nemoretriever_parse_extractor(
|
|
|
355
356
|
img_numpy = crop_image(page_image, transformed_bbox)
|
|
356
357
|
|
|
357
358
|
if img_numpy is not None:
|
|
358
|
-
base64_img = numpy_to_base64(img_numpy)
|
|
359
|
+
base64_img = numpy_to_base64(img_numpy, format=YOLOX_PAGE_IMAGE_FORMAT)
|
|
359
360
|
image = Base64Image(
|
|
360
361
|
image=base64_img,
|
|
361
362
|
bbox=transformed_bbox,
|
|
@@ -4,20 +4,21 @@
|
|
|
4
4
|
# Copyright (c) 2024, NVIDIA CORPORATION.
|
|
5
5
|
|
|
6
6
|
import base64
|
|
7
|
+
import inspect
|
|
7
8
|
import io
|
|
8
|
-
|
|
9
|
-
import pandas as pd
|
|
10
|
-
from typing import Any, Dict, List, Optional
|
|
11
9
|
import logging
|
|
10
|
+
from typing import Any
|
|
11
|
+
from typing import Dict
|
|
12
|
+
from typing import List
|
|
13
|
+
from typing import Optional
|
|
12
14
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
)
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from nv_ingest_api.internal.extract.pdf.engines import adobe_extractor
|
|
17
|
+
from nv_ingest_api.internal.extract.pdf.engines import llama_parse_extractor
|
|
18
|
+
from nv_ingest_api.internal.extract.pdf.engines import nemoretriever_parse_extractor
|
|
19
|
+
from nv_ingest_api.internal.extract.pdf.engines import pdfium_extractor
|
|
20
|
+
from nv_ingest_api.internal.extract.pdf.engines import tika_extractor
|
|
21
|
+
from nv_ingest_api.internal.extract.pdf.engines import unstructured_io_extractor
|
|
21
22
|
from nv_ingest_api.util.exception_handlers.decorators import unified_exception_handler
|
|
22
23
|
|
|
23
24
|
# Import extraction functions for different engines.
|
|
@@ -43,6 +44,7 @@ def _work_extract_pdf(
|
|
|
43
44
|
extract_infographics: bool,
|
|
44
45
|
extract_tables: bool,
|
|
45
46
|
extract_charts: bool,
|
|
47
|
+
extract_page_as_image: bool,
|
|
46
48
|
extractor_config: dict,
|
|
47
49
|
execution_trace_log=None,
|
|
48
50
|
) -> Any:
|
|
@@ -52,17 +54,25 @@ def _work_extract_pdf(
|
|
|
52
54
|
|
|
53
55
|
extract_method = extractor_config["extract_method"]
|
|
54
56
|
extractor_fn = EXTRACTOR_LOOKUP.get(extract_method, pdfium_extractor)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
57
|
+
|
|
58
|
+
extractor_fn_args = dict(
|
|
59
|
+
pdf_stream=pdf_stream,
|
|
60
|
+
extract_text=extract_text,
|
|
61
|
+
extract_images=extract_images,
|
|
62
|
+
extract_infographics=extract_infographics,
|
|
63
|
+
extract_tables=extract_tables,
|
|
64
|
+
extract_charts=extract_charts,
|
|
65
|
+
extractor_config=extractor_config,
|
|
66
|
+
execution_trace_log=execution_trace_log,
|
|
64
67
|
)
|
|
65
68
|
|
|
69
|
+
if "extract_page_as_image" in inspect.signature(extractor_fn).parameters:
|
|
70
|
+
extractor_fn_args["extract_page_as_image"] = extract_page_as_image
|
|
71
|
+
elif extract_page_as_image:
|
|
72
|
+
logger.warning(f"`extract_page_as_image` is set to True, but {extract_method} does not support it.")
|
|
73
|
+
|
|
74
|
+
return extractor_fn(**extractor_fn_args)
|
|
75
|
+
|
|
66
76
|
|
|
67
77
|
@unified_exception_handler
|
|
68
78
|
def _orchestrate_row_extraction(
|
|
@@ -97,6 +107,7 @@ def _orchestrate_row_extraction(
|
|
|
97
107
|
extract_tables = params.pop("extract_tables", False)
|
|
98
108
|
extract_charts = params.pop("extract_charts", False)
|
|
99
109
|
extract_infographics = params.pop("extract_infographics", False)
|
|
110
|
+
extract_page_as_image = params.pop("extract_page_as_image", False)
|
|
100
111
|
extract_method = params.get("extract_method", "pdfium")
|
|
101
112
|
except KeyError as e:
|
|
102
113
|
raise ValueError(f"Missing required extraction flag: {e}")
|
|
@@ -137,6 +148,7 @@ def _orchestrate_row_extraction(
|
|
|
137
148
|
extract_text=extract_text,
|
|
138
149
|
extract_images=extract_images,
|
|
139
150
|
extract_infographics=extract_infographics,
|
|
151
|
+
extract_page_as_image=extract_page_as_image,
|
|
140
152
|
extract_tables=extract_tables,
|
|
141
153
|
extract_charts=extract_charts,
|
|
142
154
|
extractor_config=extractor_config,
|
|
@@ -24,16 +24,19 @@ import numpy as np
|
|
|
24
24
|
import pandas as pd
|
|
25
25
|
import pypdfium2 as libpdfium
|
|
26
26
|
|
|
27
|
+
from nv_ingest_api.internal.enums.common import ContentTypeEnum
|
|
27
28
|
from nv_ingest_api.internal.primitives.nim.default_values import YOLOX_MAX_BATCH_SIZE
|
|
28
29
|
from nv_ingest_api.internal.primitives.nim.model_interface.yolox import (
|
|
29
30
|
YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
|
|
30
31
|
YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
|
|
32
|
+
YOLOX_PAGE_IMAGE_FORMAT,
|
|
31
33
|
get_yolox_model_name,
|
|
32
34
|
YoloxPageElementsModelInterface,
|
|
33
35
|
)
|
|
34
36
|
from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import PDFiumConfigSchema
|
|
35
37
|
from nv_ingest_api.internal.enums.common import TableFormatEnum, TextTypeEnum, AccessLevelEnum
|
|
36
38
|
from nv_ingest_api.util.metadata.aggregators import (
|
|
39
|
+
construct_image_metadata_from_base64,
|
|
37
40
|
construct_image_metadata_from_pdf_image,
|
|
38
41
|
extract_pdf_metadata,
|
|
39
42
|
construct_text_metadata,
|
|
@@ -46,6 +49,7 @@ from nv_ingest_api.util.pdf.pdfium import (
|
|
|
46
49
|
extract_image_like_objects_from_pdfium_page,
|
|
47
50
|
)
|
|
48
51
|
from nv_ingest_api.util.pdf.pdfium import pdfium_pages_to_numpy
|
|
52
|
+
from nv_ingest_api.util.image_processing import scale_image_to_encoding_size
|
|
49
53
|
from nv_ingest_api.util.image_processing.transforms import numpy_to_base64, crop_image
|
|
50
54
|
|
|
51
55
|
logger = logging.getLogger(__name__)
|
|
@@ -186,7 +190,7 @@ def _extract_page_element_images(
|
|
|
186
190
|
if cropped is None:
|
|
187
191
|
continue
|
|
188
192
|
|
|
189
|
-
base64_img = numpy_to_base64(cropped)
|
|
193
|
+
base64_img = numpy_to_base64(cropped, format=YOLOX_PAGE_IMAGE_FORMAT)
|
|
190
194
|
|
|
191
195
|
bbox_in_orig_coord = (
|
|
192
196
|
int(w1) - pad_width,
|
|
@@ -384,6 +388,7 @@ def pdfium_extractor(
|
|
|
384
388
|
extract_infographics: bool,
|
|
385
389
|
extract_tables: bool,
|
|
386
390
|
extract_charts: bool,
|
|
391
|
+
extract_page_as_image: bool,
|
|
387
392
|
extractor_config: dict,
|
|
388
393
|
execution_trace_log: Optional[List[Any]] = None,
|
|
389
394
|
) -> pd.DataFrame:
|
|
@@ -524,6 +529,24 @@ def pdfium_extractor(
|
|
|
524
529
|
)
|
|
525
530
|
extracted_data.extend(image_data)
|
|
526
531
|
|
|
532
|
+
# Full page image extraction
|
|
533
|
+
if extract_page_as_image:
|
|
534
|
+
page_text = _extract_page_text(page)
|
|
535
|
+
image, _ = pdfium_pages_to_numpy([page], scale_tuple=(16384, 16384), trace_info=execution_trace_log)
|
|
536
|
+
base64_image = numpy_to_base64(image[0])
|
|
537
|
+
if len(base64_image) > 2**24 - 1:
|
|
538
|
+
base64_image, _ = scale_image_to_encoding_size(base64_image, max_base64_size=2**24 - 1)
|
|
539
|
+
image_meta = construct_image_metadata_from_base64(
|
|
540
|
+
base64_image,
|
|
541
|
+
page_idx,
|
|
542
|
+
page_count,
|
|
543
|
+
source_metadata,
|
|
544
|
+
base_unified_metadata,
|
|
545
|
+
subtype=ContentTypeEnum.PAGE_IMAGE,
|
|
546
|
+
text=page_text,
|
|
547
|
+
)
|
|
548
|
+
extracted_data.append(image_meta)
|
|
549
|
+
|
|
527
550
|
# If we want tables or charts, rasterize the page and store it
|
|
528
551
|
if extract_tables or extract_charts or extract_infographics:
|
|
529
552
|
image, padding_offsets = pdfium_pages_to_numpy(
|
|
@@ -574,6 +597,7 @@ def pdfium_extractor(
|
|
|
574
597
|
execution_trace_log=execution_trace_log,
|
|
575
598
|
)
|
|
576
599
|
futures.append(future)
|
|
600
|
+
|
|
577
601
|
pages_for_tables.clear()
|
|
578
602
|
|
|
579
603
|
# Wait for all asynchronous jobs to complete.
|
|
@@ -120,6 +120,7 @@ class NemoRetrieverParseModelInterface(ModelInterface):
|
|
|
120
120
|
logger.debug("Formatting input for HTTP NemoRetrieverParse model")
|
|
121
121
|
# Prepare payload for HTTP request
|
|
122
122
|
|
|
123
|
+
## TODO: Ask @Edward Kim if we want to switch to JPEG/PNG here
|
|
123
124
|
if "images" in data:
|
|
124
125
|
base64_list = [numpy_to_base64(img) for img in data["images"]]
|
|
125
126
|
else:
|
|
@@ -2,9 +2,7 @@
|
|
|
2
2
|
# All rights reserved.
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
import base64
|
|
7
|
-
import io
|
|
5
|
+
import os
|
|
8
6
|
import logging
|
|
9
7
|
import warnings
|
|
10
8
|
from math import log
|
|
@@ -20,11 +18,11 @@ import packaging
|
|
|
20
18
|
import pandas as pd
|
|
21
19
|
import torch
|
|
22
20
|
import torchvision
|
|
23
|
-
from PIL import Image
|
|
24
21
|
|
|
25
22
|
from nv_ingest_api.internal.primitives.nim import ModelInterface
|
|
26
23
|
from nv_ingest_api.internal.primitives.nim.model_interface.helpers import get_model_name
|
|
27
24
|
from nv_ingest_api.util.image_processing import scale_image_to_encoding_size
|
|
25
|
+
from nv_ingest_api.util.image_processing.transforms import numpy_to_base64
|
|
28
26
|
|
|
29
27
|
logger = logging.getLogger(__name__)
|
|
30
28
|
|
|
@@ -35,6 +33,7 @@ YOLOX_PAGE_MIN_SCORE = 0.1
|
|
|
35
33
|
YOLOX_PAGE_NIM_MAX_IMAGE_SIZE = 512_000
|
|
36
34
|
YOLOX_PAGE_IMAGE_PREPROC_HEIGHT = 1024
|
|
37
35
|
YOLOX_PAGE_IMAGE_PREPROC_WIDTH = 1024
|
|
36
|
+
YOLOX_PAGE_IMAGE_FORMAT = os.getenv("YOLOX_PAGE_IMAGE_FORMAT", "PNG")
|
|
38
37
|
|
|
39
38
|
# yolox-page-elements-v1 contants
|
|
40
39
|
YOLOX_PAGE_V1_NUM_CLASSES = 4
|
|
@@ -239,15 +238,11 @@ class YoloxModelInterfaceBase(ModelInterface):
|
|
|
239
238
|
# Convert to uint8 if needed.
|
|
240
239
|
if image.dtype != np.uint8:
|
|
241
240
|
image = (image * 255).astype(np.uint8)
|
|
242
|
-
# Convert the numpy array to a PIL Image.
|
|
243
|
-
image_pil = Image.fromarray(image)
|
|
244
|
-
original_size = image_pil.size
|
|
245
|
-
|
|
246
|
-
# Save the image to a buffer and encode to base64.
|
|
247
|
-
buffered = io.BytesIO()
|
|
248
|
-
image_pil.save(buffered, format="PNG")
|
|
249
|
-
image_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
250
241
|
|
|
242
|
+
# Get original size directly from numpy array (width, height)
|
|
243
|
+
original_size = (image.shape[1], image.shape[0])
|
|
244
|
+
# Convert numpy array directly to base64 using OpenCV
|
|
245
|
+
image_b64 = numpy_to_base64(image, format=YOLOX_PAGE_IMAGE_FORMAT)
|
|
251
246
|
# Scale the image if necessary.
|
|
252
247
|
scaled_image_b64, new_size = scale_image_to_encoding_size(
|
|
253
248
|
image_b64, max_base64_size=self.nim_max_image_size
|
|
@@ -107,6 +107,10 @@ class IngestTaskEmbedSchema(BaseModelNoExt):
|
|
|
107
107
|
model_name: Optional[str] = None
|
|
108
108
|
api_key: Optional[str] = None
|
|
109
109
|
filter_errors: bool = False
|
|
110
|
+
text_elements_modality: Optional[str] = None
|
|
111
|
+
image_elements_modality: Optional[str] = None
|
|
112
|
+
structured_elements_modality: Optional[str] = None
|
|
113
|
+
audio_elements_modality: Optional[str] = None
|
|
110
114
|
|
|
111
115
|
|
|
112
116
|
class IngestTaskVdbUploadSchema(BaseModelNoExt):
|
|
@@ -195,6 +199,7 @@ class IngestTaskSchema(BaseModelNoExt):
|
|
|
195
199
|
validated_task_properties = expected_schema_cls(**task_properties)
|
|
196
200
|
values["type"] = task_type # ensure type is now always the enum
|
|
197
201
|
values["task_properties"] = validated_task_properties
|
|
202
|
+
|
|
198
203
|
return values
|
|
199
204
|
|
|
200
205
|
@field_validator("type", mode="before")
|
|
@@ -22,5 +22,9 @@ class TextEmbeddingSchema(BaseModel):
|
|
|
22
22
|
input_type: str = Field(default="passage")
|
|
23
23
|
raise_on_failure: bool = Field(default=False)
|
|
24
24
|
truncate: str = Field(default="END")
|
|
25
|
+
text_elements_modality: str = Field(default="text")
|
|
26
|
+
image_elements_modality: str = Field(default="text")
|
|
27
|
+
structured_elements_modality: str = Field(default="text")
|
|
28
|
+
audio_elements_modality: str = Field(default="text")
|
|
25
29
|
|
|
26
30
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
from concurrent.futures import ThreadPoolExecutor
|
|
7
|
+
from functools import partial
|
|
7
8
|
from typing import Any, Dict, Tuple, Optional, Iterable, List
|
|
8
9
|
|
|
9
10
|
import pandas as pd
|
|
@@ -19,6 +20,9 @@ from nv_ingest_api.util.schema.schema_validator import validate_schema
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
21
22
|
|
|
23
|
+
MULTI_MODAL_MODELS = ["llama-3.2-nemoretriever-1b-vlm-embed-v1"]
|
|
24
|
+
|
|
25
|
+
|
|
22
26
|
# ------------------------------------------------------------------------------
|
|
23
27
|
# Asynchronous Embedding Requests
|
|
24
28
|
# ------------------------------------------------------------------------------
|
|
@@ -33,6 +37,7 @@ def _make_async_request(
|
|
|
33
37
|
input_type: str,
|
|
34
38
|
truncate: str,
|
|
35
39
|
filter_errors: bool,
|
|
40
|
+
modalities: Optional[List[str]] = None,
|
|
36
41
|
) -> list:
|
|
37
42
|
"""
|
|
38
43
|
Interacts directly with the NIM embedding service to calculate embeddings for a batch of prompts.
|
|
@@ -74,11 +79,18 @@ def _make_async_request(
|
|
|
74
79
|
base_url=embedding_nim_endpoint,
|
|
75
80
|
)
|
|
76
81
|
|
|
82
|
+
extra_body = {
|
|
83
|
+
"input_type": input_type,
|
|
84
|
+
"truncate": truncate,
|
|
85
|
+
}
|
|
86
|
+
if modalities:
|
|
87
|
+
extra_body["modality"] = modalities
|
|
88
|
+
|
|
77
89
|
resp = client.embeddings.create(
|
|
78
90
|
input=prompts,
|
|
79
91
|
model=embedding_model,
|
|
80
92
|
encoding_format=encoding_format,
|
|
81
|
-
extra_body=
|
|
93
|
+
extra_body=extra_body,
|
|
82
94
|
)
|
|
83
95
|
|
|
84
96
|
response["embedding"] = resp.data
|
|
@@ -110,6 +122,7 @@ def _async_request_handler(
|
|
|
110
122
|
input_type: str,
|
|
111
123
|
truncate: str,
|
|
112
124
|
filter_errors: bool,
|
|
125
|
+
modalities: Optional[List[str]] = None,
|
|
113
126
|
) -> List[dict]:
|
|
114
127
|
"""
|
|
115
128
|
Gathers calculated embedding results from the NIM embedding service concurrently.
|
|
@@ -138,6 +151,9 @@ def _async_request_handler(
|
|
|
138
151
|
List[dict]
|
|
139
152
|
A list of response dictionaries from the embedding service.
|
|
140
153
|
"""
|
|
154
|
+
if modalities is None:
|
|
155
|
+
modalities = [None] * len(prompts)
|
|
156
|
+
|
|
141
157
|
with ThreadPoolExecutor() as executor:
|
|
142
158
|
futures = [
|
|
143
159
|
executor.submit(
|
|
@@ -150,8 +166,9 @@ def _async_request_handler(
|
|
|
150
166
|
input_type=input_type,
|
|
151
167
|
truncate=truncate,
|
|
152
168
|
filter_errors=filter_errors,
|
|
169
|
+
modalities=modality_batch,
|
|
153
170
|
)
|
|
154
|
-
for prompt_batch in prompts
|
|
171
|
+
for prompt_batch, modality_batch in zip(prompts, modalities)
|
|
155
172
|
]
|
|
156
173
|
results = [future.result() for future in futures]
|
|
157
174
|
|
|
@@ -167,6 +184,7 @@ def _async_runner(
|
|
|
167
184
|
input_type: str,
|
|
168
185
|
truncate: str,
|
|
169
186
|
filter_errors: bool,
|
|
187
|
+
modalities: Optional[List[str]] = None,
|
|
170
188
|
) -> dict:
|
|
171
189
|
"""
|
|
172
190
|
Concurrently launches all NIM embedding requests and flattens the results.
|
|
@@ -204,6 +222,7 @@ def _async_runner(
|
|
|
204
222
|
input_type,
|
|
205
223
|
truncate,
|
|
206
224
|
filter_errors,
|
|
225
|
+
modalities=modalities,
|
|
207
226
|
)
|
|
208
227
|
|
|
209
228
|
flat_results = {"embeddings": [], "info_msgs": []}
|
|
@@ -263,7 +282,19 @@ def _add_embeddings(row, embeddings, info_msgs):
|
|
|
263
282
|
return row
|
|
264
283
|
|
|
265
284
|
|
|
266
|
-
def
|
|
285
|
+
def _format_image_input_string(image_b64: Optional[str]) -> str:
|
|
286
|
+
if not image_b64:
|
|
287
|
+
return
|
|
288
|
+
return f"data:image/png;base64,{image_b64}"
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _format_text_image_pair_input_string(text: Optional[str], image_b64: Optional[str]) -> str:
|
|
292
|
+
if (not text) or (not text.strip()) or (not image_b64):
|
|
293
|
+
return
|
|
294
|
+
return f"{text.strip()} {_format_image_input_string(image_b64)}"
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _get_pandas_text_content(row, modality="text"):
|
|
267
298
|
"""
|
|
268
299
|
Extracts text content from a DataFrame row.
|
|
269
300
|
|
|
@@ -280,7 +311,7 @@ def _get_pandas_text_content(row):
|
|
|
280
311
|
return row["content"]
|
|
281
312
|
|
|
282
313
|
|
|
283
|
-
def _get_pandas_table_content(row):
|
|
314
|
+
def _get_pandas_table_content(row, modality="text"):
|
|
284
315
|
"""
|
|
285
316
|
Extracts table/chart content from a DataFrame row.
|
|
286
317
|
|
|
@@ -294,10 +325,19 @@ def _get_pandas_table_content(row):
|
|
|
294
325
|
str
|
|
295
326
|
The table/chart content from the row.
|
|
296
327
|
"""
|
|
297
|
-
|
|
328
|
+
if modality == "text":
|
|
329
|
+
content = row.get("table_metadata", {}).get("table_content")
|
|
330
|
+
elif modality == "image":
|
|
331
|
+
content = _format_image_input_string(row.get("content"))
|
|
332
|
+
elif modality == "text_image":
|
|
333
|
+
text = row.get("table_metadata", {}).get("table_content")
|
|
334
|
+
image = row.get("content")
|
|
335
|
+
content = _format_text_image_pair_input_string(text, image)
|
|
336
|
+
|
|
337
|
+
return content
|
|
298
338
|
|
|
299
339
|
|
|
300
|
-
def _get_pandas_image_content(row):
|
|
340
|
+
def _get_pandas_image_content(row, modality="text"):
|
|
301
341
|
"""
|
|
302
342
|
Extracts image caption content from a DataFrame row.
|
|
303
343
|
|
|
@@ -311,10 +351,28 @@ def _get_pandas_image_content(row):
|
|
|
311
351
|
str
|
|
312
352
|
The image caption from the row.
|
|
313
353
|
"""
|
|
314
|
-
|
|
354
|
+
subtype = row.get("content_metadata", {}).get("subtype")
|
|
355
|
+
if modality == "text":
|
|
356
|
+
if subtype == "page_image":
|
|
357
|
+
content = row.get("image_metadata", {}).get("text")
|
|
358
|
+
else:
|
|
359
|
+
content = row.get("image_metadata", {}).get("caption")
|
|
360
|
+
elif modality == "image":
|
|
361
|
+
content = _format_image_input_string(row.get("content"))
|
|
362
|
+
elif modality == "text_image":
|
|
363
|
+
if subtype == "page_image":
|
|
364
|
+
text = row.get("image_metadata", {}).get("text")
|
|
365
|
+
else:
|
|
366
|
+
text = row.get("image_metadata", {}).get("caption")
|
|
367
|
+
image = row.get("content")
|
|
368
|
+
content = _format_text_image_pair_input_string(text, image)
|
|
315
369
|
|
|
370
|
+
# A workaround to save memory.
|
|
371
|
+
row["content"] = ""
|
|
372
|
+
return content
|
|
316
373
|
|
|
317
|
-
|
|
374
|
+
|
|
375
|
+
def _get_pandas_audio_content(row, modality="text"):
|
|
318
376
|
"""
|
|
319
377
|
A pandas UDF used to select extracted audio transcription to be used to create embeddings.
|
|
320
378
|
"""
|
|
@@ -408,6 +466,23 @@ def _concatenate_extractions_pandas(
|
|
|
408
466
|
# ------------------------------------------------------------------------------
|
|
409
467
|
|
|
410
468
|
|
|
469
|
+
def does_model_support_multimodal_embeddings(model: str) -> bool:
|
|
470
|
+
"""
|
|
471
|
+
Checks if a given model supports multi-modal embeddings.
|
|
472
|
+
|
|
473
|
+
Parameters
|
|
474
|
+
----------
|
|
475
|
+
model : str
|
|
476
|
+
The name of the model.
|
|
477
|
+
|
|
478
|
+
Returns
|
|
479
|
+
-------
|
|
480
|
+
bool
|
|
481
|
+
True if the model supports multi-modal embeddings, False otherwise.
|
|
482
|
+
"""
|
|
483
|
+
return model in MULTI_MODAL_MODELS
|
|
484
|
+
|
|
485
|
+
|
|
411
486
|
def transform_create_text_embeddings_internal(
|
|
412
487
|
df_transform_ledger: pd.DataFrame,
|
|
413
488
|
task_config: Dict[str, Any],
|
|
@@ -460,6 +535,15 @@ def transform_create_text_embeddings_internal(
|
|
|
460
535
|
ContentTypeEnum.AUDIO: _get_pandas_audio_content,
|
|
461
536
|
ContentTypeEnum.VIDEO: lambda x: None, # Not supported yet.
|
|
462
537
|
}
|
|
538
|
+
task_type_to_modality = {
|
|
539
|
+
ContentTypeEnum.TEXT: task_config.get("text_elements_modality") or transform_config.text_elements_modality,
|
|
540
|
+
ContentTypeEnum.STRUCTURED: (
|
|
541
|
+
task_config.get("structured_elements_modality") or transform_config.structured_elements_modality
|
|
542
|
+
),
|
|
543
|
+
ContentTypeEnum.IMAGE: task_config.get("image_elements_modality") or transform_config.image_elements_modality,
|
|
544
|
+
ContentTypeEnum.AUDIO: task_config.get("audio_elements_modality") or transform_config.audio_elements_modality,
|
|
545
|
+
ContentTypeEnum.VIDEO: lambda x: None, # Not supported yet.
|
|
546
|
+
}
|
|
463
547
|
|
|
464
548
|
def _content_type_getter(row):
|
|
465
549
|
return row["content_metadata"]["type"]
|
|
@@ -480,7 +564,7 @@ def transform_create_text_embeddings_internal(
|
|
|
480
564
|
# Extract content and normalize empty or non-str to None
|
|
481
565
|
extracted_content = (
|
|
482
566
|
df_content["metadata"]
|
|
483
|
-
.apply(content_getter)
|
|
567
|
+
.apply(partial(content_getter, modality=task_type_to_modality[content_type]))
|
|
484
568
|
.apply(lambda x: x.strip() if isinstance(x, str) and x.strip() else None)
|
|
485
569
|
)
|
|
486
570
|
df_content["_content"] = extracted_content
|
|
@@ -488,9 +572,15 @@ def transform_create_text_embeddings_internal(
|
|
|
488
572
|
# Prepare batches for only valid (non-None) content
|
|
489
573
|
valid_content_mask = df_content["_content"].notna()
|
|
490
574
|
if valid_content_mask.any():
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
575
|
+
filtered_content_list = df_content.loc[valid_content_mask, "_content"].tolist()
|
|
576
|
+
filtered_content_batches = _generate_batches(filtered_content_list, batch_size=transform_config.batch_size)
|
|
577
|
+
|
|
578
|
+
if model_name in MULTI_MODAL_MODELS:
|
|
579
|
+
modality_list = [task_type_to_modality[content_type]] * len(filtered_content_list)
|
|
580
|
+
modality_batches = _generate_batches(modality_list, batch_size=transform_config.batch_size)
|
|
581
|
+
else:
|
|
582
|
+
modality_batches = None
|
|
583
|
+
|
|
494
584
|
content_embeddings = _async_runner(
|
|
495
585
|
filtered_content_batches,
|
|
496
586
|
api_key,
|
|
@@ -500,6 +590,7 @@ def transform_create_text_embeddings_internal(
|
|
|
500
590
|
transform_config.input_type,
|
|
501
591
|
transform_config.truncate,
|
|
502
592
|
False,
|
|
593
|
+
modalities=modality_batches,
|
|
503
594
|
)
|
|
504
595
|
# Build a simple row index -> embedding map
|
|
505
596
|
embeddings_dict = dict(
|
|
@@ -2,29 +2,55 @@
|
|
|
2
2
|
# All rights reserved.
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
import base64
|
|
6
|
-
import io
|
|
7
5
|
import logging
|
|
8
|
-
from io import BytesIO
|
|
9
6
|
from math import ceil
|
|
10
7
|
from math import floor
|
|
11
8
|
from typing import Optional
|
|
12
9
|
from typing import Tuple
|
|
13
10
|
|
|
11
|
+
import cv2
|
|
14
12
|
import numpy as np
|
|
13
|
+
from io import BytesIO
|
|
15
14
|
from PIL import Image
|
|
16
|
-
from PIL import UnidentifiedImageError
|
|
17
15
|
|
|
18
16
|
from nv_ingest_api.util.converters import bytetools
|
|
19
17
|
|
|
18
|
+
# Configure OpenCV to use a single thread for image processing
|
|
19
|
+
cv2.setNumThreads(1)
|
|
20
20
|
DEFAULT_MAX_WIDTH = 1024
|
|
21
21
|
DEFAULT_MAX_HEIGHT = 1280
|
|
22
22
|
|
|
23
|
+
# Workaround for PIL.Image.DecompressionBombError
|
|
24
|
+
Image.MAX_IMAGE_PIXELS = None
|
|
25
|
+
|
|
23
26
|
logger = logging.getLogger(__name__)
|
|
24
27
|
|
|
25
28
|
|
|
29
|
+
def _resize_image_opencv(
|
|
30
|
+
array: np.ndarray, target_size: Tuple[int, int], interpolation=cv2.INTER_LANCZOS4
|
|
31
|
+
) -> np.ndarray:
|
|
32
|
+
"""
|
|
33
|
+
Resizes a NumPy array representing an image using OpenCV.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
array : np.ndarray
|
|
38
|
+
The input image as a NumPy array.
|
|
39
|
+
target_size : Tuple[int, int]
|
|
40
|
+
The target size as (width, height).
|
|
41
|
+
interpolation : int, optional
|
|
42
|
+
OpenCV interpolation method. Defaults to cv2.INTER_LANCZOS4.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
np.ndarray
|
|
47
|
+
The resized image as a NumPy array.
|
|
48
|
+
"""
|
|
49
|
+
return cv2.resize(array, target_size, interpolation=interpolation)
|
|
50
|
+
|
|
51
|
+
|
|
26
52
|
def scale_image_to_encoding_size(
|
|
27
|
-
base64_image: str, max_base64_size: int = 180_000, initial_reduction: float = 0.9
|
|
53
|
+
base64_image: str, max_base64_size: int = 180_000, initial_reduction: float = 0.9, format: str = "PNG", **kwargs
|
|
28
54
|
) -> Tuple[str, Tuple[int, int]]:
|
|
29
55
|
"""
|
|
30
56
|
Decodes a base64-encoded image, resizes it if needed, and re-encodes it as base64.
|
|
@@ -38,12 +64,19 @@ def scale_image_to_encoding_size(
|
|
|
38
64
|
Maximum allowable size for the base64-encoded image, by default 180,000 characters.
|
|
39
65
|
initial_reduction : float, optional
|
|
40
66
|
Initial reduction step for resizing, by default 0.9.
|
|
67
|
+
format : str, optional
|
|
68
|
+
The image format to use for encoding. Supported formats are "PNG" and "JPEG".
|
|
69
|
+
Defaults to "PNG".
|
|
70
|
+
**kwargs
|
|
71
|
+
Additional keyword arguments passed to the format-specific encoding function.
|
|
72
|
+
For JPEG: quality (int, default=100) - JPEG quality (1-100).
|
|
73
|
+
For PNG: compression (int, default=3) - PNG compression level (0-9).
|
|
41
74
|
|
|
42
75
|
Returns
|
|
43
76
|
-------
|
|
44
77
|
Tuple[str, Tuple[int, int]]
|
|
45
78
|
A tuple containing:
|
|
46
|
-
- Base64-encoded
|
|
79
|
+
- Base64-encoded image string in the specified format, resized if necessary.
|
|
47
80
|
- The new size as a tuple (width, height).
|
|
48
81
|
|
|
49
82
|
Raises
|
|
@@ -52,12 +85,11 @@ def scale_image_to_encoding_size(
|
|
|
52
85
|
If the image cannot be resized below the specified max_base64_size.
|
|
53
86
|
"""
|
|
54
87
|
try:
|
|
55
|
-
# Decode the base64 image
|
|
56
|
-
|
|
57
|
-
img = Image.open(io.BytesIO(image_data)).convert("RGB")
|
|
88
|
+
# Decode the base64 image using OpenCV (returns RGB format)
|
|
89
|
+
img_array = base64_to_numpy(base64_image)
|
|
58
90
|
|
|
59
|
-
# Initial image size
|
|
60
|
-
original_size =
|
|
91
|
+
# Initial image size (height, width, channels) -> (width, height)
|
|
92
|
+
original_size = (img_array.shape[1], img_array.shape[0])
|
|
61
93
|
|
|
62
94
|
# Check initial size
|
|
63
95
|
if len(base64_image) <= max_base64_size:
|
|
@@ -66,23 +98,24 @@ def scale_image_to_encoding_size(
|
|
|
66
98
|
# Initial reduction step
|
|
67
99
|
reduction_step = initial_reduction
|
|
68
100
|
new_size = original_size
|
|
101
|
+
current_img = img_array.copy()
|
|
102
|
+
original_width, original_height = original_size
|
|
103
|
+
|
|
69
104
|
while len(base64_image) > max_base64_size:
|
|
70
|
-
|
|
71
|
-
new_size
|
|
105
|
+
new_size = (int(original_width * reduction_step), int(original_height * reduction_step))
|
|
106
|
+
if new_size[0] < 1 or new_size[1] < 1:
|
|
107
|
+
raise ValueError("Image cannot be resized further without becoming too small.")
|
|
72
108
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
109
|
+
# Resize the image using OpenCV
|
|
110
|
+
current_img = _resize_image_opencv(img_array, new_size)
|
|
111
|
+
|
|
112
|
+
# Re-encode as base64 using the specified format
|
|
113
|
+
base64_image = numpy_to_base64(current_img, format=format, **kwargs)
|
|
77
114
|
|
|
78
115
|
# Adjust the reduction step if necessary
|
|
79
116
|
if len(base64_image) > max_base64_size:
|
|
80
117
|
reduction_step *= 0.95 # Reduce size further if needed
|
|
81
118
|
|
|
82
|
-
# Safety check
|
|
83
|
-
if new_size[0] < 1 or new_size[1] < 1:
|
|
84
|
-
raise Exception("Image cannot be resized further without becoming too small.")
|
|
85
|
-
|
|
86
119
|
return base64_image, new_size
|
|
87
120
|
|
|
88
121
|
except Exception as e:
|
|
@@ -90,36 +123,84 @@ def scale_image_to_encoding_size(
|
|
|
90
123
|
raise
|
|
91
124
|
|
|
92
125
|
|
|
93
|
-
def
|
|
126
|
+
def _detect_base64_image_format(base64_string: str) -> Optional[str]:
|
|
94
127
|
"""
|
|
95
|
-
|
|
128
|
+
Detects the format of a base64-encoded image using Pillow.
|
|
96
129
|
|
|
97
130
|
Parameters
|
|
98
131
|
----------
|
|
99
|
-
|
|
132
|
+
base64_string : str
|
|
100
133
|
Base64-encoded image string.
|
|
101
134
|
|
|
102
135
|
Returns
|
|
103
136
|
-------
|
|
104
|
-
|
|
105
|
-
Base64-encoded PNG image string.
|
|
137
|
+
The detected format ("PNG", "JPEG", "UNKNOWN")
|
|
106
138
|
"""
|
|
107
139
|
try:
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
140
|
+
image_bytes = bytetools.bytesfrombase64(base64_string)
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.error(f"Invalid base64 string: {e}")
|
|
143
|
+
raise ValueError(f"Invalid base64 string: {e}") from e
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
with Image.open(BytesIO(image_bytes)) as img:
|
|
147
|
+
return img.format.upper()
|
|
148
|
+
except ImportError:
|
|
149
|
+
raise ImportError("Pillow library not available")
|
|
150
|
+
except Exception as e:
|
|
151
|
+
logger.error(f"Error detecting image format: {e}")
|
|
152
|
+
return "UNKNOWN"
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def ensure_base64_format(base64_image: str, target_format: str = "PNG", **kwargs) -> str:
|
|
156
|
+
"""
|
|
157
|
+
Ensures the given base64-encoded image is in the specified format. Converts if necessary.
|
|
158
|
+
Skips conversion if the image is already in the target format.
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
base64_image : str
|
|
163
|
+
Base64-encoded image string.
|
|
164
|
+
target_format : str, optional
|
|
165
|
+
The target image format. Supported formats are "PNG" and "JPEG". Defaults to "PNG".
|
|
166
|
+
**kwargs
|
|
167
|
+
Additional keyword arguments passed to the format-specific encoding function.
|
|
168
|
+
For JPEG: quality (int, default=100) - JPEG quality (1-100).
|
|
169
|
+
For PNG: compression (int, default=3) - PNG compression level (0-9).
|
|
111
170
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
image.convert("RGB").save(buffered, format="PNG")
|
|
117
|
-
base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
171
|
+
Returns
|
|
172
|
+
-------
|
|
173
|
+
str
|
|
174
|
+
Base64-encoded image string in the specified format.
|
|
118
175
|
|
|
176
|
+
Raises
|
|
177
|
+
------
|
|
178
|
+
ValueError
|
|
179
|
+
If there is an error during format conversion.
|
|
180
|
+
"""
|
|
181
|
+
target_format = target_format.upper()
|
|
182
|
+
if target_format == "JPG":
|
|
183
|
+
target_format = "JPEG"
|
|
184
|
+
|
|
185
|
+
current_format = _detect_base64_image_format(base64_image)
|
|
186
|
+
if current_format == "UNKNOWN":
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"Unable to decode image from base64 string: {base64_image}, because current format could not be detected."
|
|
189
|
+
)
|
|
190
|
+
if current_format == target_format:
|
|
191
|
+
logger.debug(f"Image already in {target_format} format, skipping conversion")
|
|
119
192
|
return base64_image
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
# Decode the base64 image using OpenCV (returns RGB format)
|
|
196
|
+
img_array = base64_to_numpy(base64_image)
|
|
197
|
+
# Re-encode in the target format
|
|
198
|
+
return numpy_to_base64(img_array, format=target_format, **kwargs)
|
|
199
|
+
except ImportError as e:
|
|
200
|
+
raise e
|
|
120
201
|
except Exception as e:
|
|
121
|
-
logger.error(f"Error
|
|
122
|
-
|
|
202
|
+
logger.error(f"Error converting image to {target_format} format: {e}")
|
|
203
|
+
raise ValueError(f"Failed to convert image to {target_format} format: {e}") from e
|
|
123
204
|
|
|
124
205
|
|
|
125
206
|
def pad_image(
|
|
@@ -302,66 +383,193 @@ def normalize_image(
|
|
|
302
383
|
return output_array
|
|
303
384
|
|
|
304
385
|
|
|
305
|
-
def
|
|
386
|
+
def _preprocess_numpy_array(array: np.ndarray) -> np.ndarray:
|
|
387
|
+
"""
|
|
388
|
+
Preprocesses a NumPy array for image encoding by ensuring proper format and data type.
|
|
389
|
+
Also handles color space conversion for OpenCV encoding.
|
|
390
|
+
|
|
391
|
+
Parameters
|
|
392
|
+
----------
|
|
393
|
+
array : np.ndarray
|
|
394
|
+
The input image as a NumPy array.
|
|
395
|
+
|
|
396
|
+
Returns
|
|
397
|
+
-------
|
|
398
|
+
np.ndarray
|
|
399
|
+
The preprocessed array in uint8 format, ready for OpenCV encoding (BGR color order for color images).
|
|
400
|
+
|
|
401
|
+
Raises
|
|
402
|
+
------
|
|
403
|
+
ValueError
|
|
404
|
+
If the input array cannot be converted into a valid image format.
|
|
405
|
+
"""
|
|
406
|
+
# Check if the array is valid and can be converted to an image
|
|
407
|
+
try:
|
|
408
|
+
# If the array represents a grayscale image, drop the redundant axis in
|
|
409
|
+
# (h, w, 1). cv2 expects (h, w) for grayscale.
|
|
410
|
+
if array.ndim == 3 and array.shape[2] == 1:
|
|
411
|
+
array = np.squeeze(array, axis=2)
|
|
412
|
+
|
|
413
|
+
# Ensure uint8 data type
|
|
414
|
+
processed_array = array.astype(np.uint8)
|
|
415
|
+
|
|
416
|
+
# OpenCV uses BGR color order, so convert RGB to BGR if needed
|
|
417
|
+
if processed_array.ndim == 3 and processed_array.shape[2] == 3:
|
|
418
|
+
# Assume input is RGB and convert to BGR for OpenCV
|
|
419
|
+
processed_array = cv2.cvtColor(processed_array, cv2.COLOR_RGB2BGR)
|
|
420
|
+
|
|
421
|
+
return processed_array
|
|
422
|
+
except Exception as e:
|
|
423
|
+
raise ValueError(f"Failed to preprocess NumPy array for image encoding: {e}")
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _encode_opencv_jpeg(array: np.ndarray, *, quality: int = 100) -> bytes:
|
|
427
|
+
"""NumPy array -> JPEG bytes using OpenCV."""
|
|
428
|
+
ok, buf = cv2.imencode(".jpg", array, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
|
|
429
|
+
if not ok:
|
|
430
|
+
raise RuntimeError("cv2.imencode failed")
|
|
431
|
+
return buf.tobytes()
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def _encode_opencv_png(array: np.ndarray, *, compression: int = 6) -> bytes:
|
|
435
|
+
"""NumPy array -> PNG bytes using OpenCV"""
|
|
436
|
+
encode_params = [
|
|
437
|
+
cv2.IMWRITE_PNG_COMPRESSION,
|
|
438
|
+
compression,
|
|
439
|
+
cv2.IMWRITE_PNG_STRATEGY,
|
|
440
|
+
cv2.IMWRITE_PNG_STRATEGY_DEFAULT,
|
|
441
|
+
]
|
|
442
|
+
ok, buf = cv2.imencode(".png", array, encode_params)
|
|
443
|
+
if not ok:
|
|
444
|
+
raise RuntimeError("cv2.imencode(.png) failed")
|
|
445
|
+
return buf.tobytes()
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def numpy_to_base64_png(array: np.ndarray) -> str:
|
|
449
|
+
"""
|
|
450
|
+
Converts a preprocessed NumPy array representing an image to a base64-encoded PNG string using OpenCV.
|
|
451
|
+
|
|
452
|
+
Parameters
|
|
453
|
+
----------
|
|
454
|
+
array : np.ndarray
|
|
455
|
+
The preprocessed input image as a NumPy array. Must have a shape compatible with image data.
|
|
456
|
+
|
|
457
|
+
Returns
|
|
458
|
+
-------
|
|
459
|
+
str
|
|
460
|
+
The base64-encoded PNG string representation of the input NumPy array.
|
|
461
|
+
|
|
462
|
+
Raises
|
|
463
|
+
------
|
|
464
|
+
RuntimeError
|
|
465
|
+
If there is an issue during the image conversion or base64 encoding process.
|
|
466
|
+
"""
|
|
467
|
+
try:
|
|
468
|
+
# Encode to PNG bytes using OpenCV
|
|
469
|
+
png_bytes = _encode_opencv_png(array)
|
|
470
|
+
|
|
471
|
+
# Convert to base64
|
|
472
|
+
base64_img = bytetools.base64frombytes(png_bytes)
|
|
473
|
+
except Exception as e:
|
|
474
|
+
raise RuntimeError(f"Failed to encode image to base64 PNG: {e}")
|
|
475
|
+
|
|
476
|
+
return base64_img
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def numpy_to_base64_jpeg(array: np.ndarray, quality: int = 100) -> str:
|
|
480
|
+
"""
|
|
481
|
+
Converts a preprocessed NumPy array representing an image to a base64-encoded JPEG string using OpenCV.
|
|
482
|
+
|
|
483
|
+
Parameters
|
|
484
|
+
----------
|
|
485
|
+
array : np.ndarray
|
|
486
|
+
The preprocessed input image as a NumPy array. Must have a shape compatible with image data.
|
|
487
|
+
quality : int, optional
|
|
488
|
+
JPEG quality (1-100), by default 100. Higher values mean better quality but larger file size.
|
|
489
|
+
|
|
490
|
+
Returns
|
|
491
|
+
-------
|
|
492
|
+
str
|
|
493
|
+
The base64-encoded JPEG string representation of the input NumPy array.
|
|
494
|
+
|
|
495
|
+
Raises
|
|
496
|
+
------
|
|
497
|
+
RuntimeError
|
|
498
|
+
If there is an issue during the image conversion or base64 encoding process.
|
|
499
|
+
"""
|
|
500
|
+
try:
|
|
501
|
+
# Encode to JPEG bytes using OpenCV
|
|
502
|
+
jpeg_bytes = _encode_opencv_jpeg(array, quality=quality)
|
|
503
|
+
|
|
504
|
+
# Convert to base64
|
|
505
|
+
base64_img = bytetools.base64frombytes(jpeg_bytes)
|
|
506
|
+
except Exception as e:
|
|
507
|
+
raise RuntimeError(f"Failed to encode image to base64 JPEG: {e}")
|
|
508
|
+
|
|
509
|
+
return base64_img
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def numpy_to_base64(array: np.ndarray, format: str = "PNG", **kwargs) -> str:
|
|
306
513
|
"""
|
|
307
514
|
Converts a NumPy array representing an image to a base64-encoded string.
|
|
308
515
|
|
|
309
|
-
The function takes a NumPy array,
|
|
310
|
-
the image
|
|
311
|
-
a format that can be converted to a valid image, such as having a shape
|
|
312
|
-
where C is the number of channels (e.g., 3 for RGB).
|
|
516
|
+
The function takes a NumPy array, preprocesses it, and then encodes
|
|
517
|
+
the image in the specified format as a base64 string. The input array is expected
|
|
518
|
+
to be in a format that can be converted to a valid image, such as having a shape
|
|
519
|
+
of (H, W, C) where C is the number of channels (e.g., 3 for RGB).
|
|
313
520
|
|
|
314
521
|
Parameters
|
|
315
522
|
----------
|
|
316
523
|
array : np.ndarray
|
|
317
524
|
The input image as a NumPy array. Must have a shape compatible with image data.
|
|
525
|
+
format : str, optional
|
|
526
|
+
The image format to use for encoding. Supported formats are "PNG" and "JPEG".
|
|
527
|
+
Defaults to "PNG".
|
|
528
|
+
**kwargs
|
|
529
|
+
Additional keyword arguments passed to the format-specific encoding function.
|
|
530
|
+
For JPEG: quality (int, default=100) - JPEG quality (1-100).
|
|
318
531
|
|
|
319
532
|
Returns
|
|
320
533
|
-------
|
|
321
534
|
str
|
|
322
|
-
The base64-encoded string representation of the input NumPy array
|
|
535
|
+
The base64-encoded string representation of the input NumPy array in the specified format.
|
|
323
536
|
|
|
324
537
|
Raises
|
|
325
538
|
------
|
|
326
539
|
ValueError
|
|
327
|
-
If the input array cannot be converted into a valid image format
|
|
540
|
+
If the input array cannot be converted into a valid image format, or if an
|
|
541
|
+
unsupported format is specified.
|
|
328
542
|
RuntimeError
|
|
329
543
|
If there is an issue during the image conversion or base64 encoding process.
|
|
330
544
|
|
|
331
545
|
Examples
|
|
332
546
|
--------
|
|
333
547
|
>>> array = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
|
|
334
|
-
>>> encoded_str = numpy_to_base64(array)
|
|
548
|
+
>>> encoded_str = numpy_to_base64(array, format="PNG")
|
|
335
549
|
>>> isinstance(encoded_str, str)
|
|
336
550
|
True
|
|
551
|
+
>>> encoded_str_jpeg = numpy_to_base64(array, format="JPEG", quality=90)
|
|
552
|
+
>>> isinstance(encoded_str_jpeg, str)
|
|
553
|
+
True
|
|
337
554
|
"""
|
|
338
|
-
#
|
|
339
|
-
|
|
340
|
-
# a grayscale image.
|
|
341
|
-
if array.ndim == 3 and array.shape[2] == 1:
|
|
342
|
-
array = np.squeeze(array, axis=2)
|
|
555
|
+
# Centralized preprocessing of the numpy array
|
|
556
|
+
processed_array = _preprocess_numpy_array(array)
|
|
343
557
|
|
|
344
|
-
|
|
345
|
-
try:
|
|
346
|
-
# Convert the NumPy array to a PIL image
|
|
347
|
-
pil_image = Image.fromarray(array.astype(np.uint8))
|
|
348
|
-
except Exception as e:
|
|
349
|
-
raise ValueError(f"Failed to convert NumPy array to image: {e}")
|
|
558
|
+
format = format.upper()
|
|
350
559
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
raise
|
|
358
|
-
|
|
359
|
-
return base64_img
|
|
560
|
+
if format == "PNG":
|
|
561
|
+
return numpy_to_base64_png(processed_array)
|
|
562
|
+
elif format == "JPEG" or format == "JPG":
|
|
563
|
+
quality = kwargs.get("quality", 100)
|
|
564
|
+
return numpy_to_base64_jpeg(processed_array, quality=quality)
|
|
565
|
+
else:
|
|
566
|
+
raise ValueError(f"Unsupported format: {format}. Supported formats are 'PNG' and 'JPEG'.")
|
|
360
567
|
|
|
361
568
|
|
|
362
569
|
def base64_to_numpy(base64_string: str) -> np.ndarray:
|
|
363
570
|
"""
|
|
364
|
-
Convert a base64-encoded image string to a NumPy array.
|
|
571
|
+
Convert a base64-encoded image string to a NumPy array using OpenCV.
|
|
572
|
+
Returns images in RGB format for consistency.
|
|
365
573
|
|
|
366
574
|
Parameters
|
|
367
575
|
----------
|
|
@@ -371,37 +579,82 @@ def base64_to_numpy(base64_string: str) -> np.ndarray:
|
|
|
371
579
|
Returns
|
|
372
580
|
-------
|
|
373
581
|
numpy.ndarray
|
|
374
|
-
NumPy array representation of the decoded image.
|
|
582
|
+
NumPy array representation of the decoded image in RGB format (for color images).
|
|
583
|
+
Grayscale images are returned as-is.
|
|
375
584
|
|
|
376
585
|
Raises
|
|
377
586
|
------
|
|
378
587
|
ValueError
|
|
379
588
|
If the base64 string is invalid or cannot be decoded into an image.
|
|
380
|
-
ImportError
|
|
381
|
-
If required libraries are not installed.
|
|
382
589
|
|
|
383
590
|
Examples
|
|
384
591
|
--------
|
|
385
592
|
>>> base64_str = '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBD...'
|
|
386
593
|
>>> img_array = base64_to_numpy(base64_str)
|
|
594
|
+
>>> # img_array is now in RGB format (for color images)
|
|
387
595
|
"""
|
|
388
596
|
try:
|
|
389
|
-
# Decode the base64 string
|
|
390
|
-
|
|
391
|
-
except
|
|
597
|
+
# Decode the base64 string to bytes using bytetools
|
|
598
|
+
image_bytes = bytetools.bytesfrombase64(base64_string)
|
|
599
|
+
except Exception as e:
|
|
392
600
|
raise ValueError("Invalid base64 string") from e
|
|
393
601
|
|
|
602
|
+
# Create numpy buffer from bytes and decode using OpenCV
|
|
603
|
+
buf = np.frombuffer(image_bytes, dtype=np.uint8)
|
|
394
604
|
try:
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
image
|
|
401
|
-
|
|
605
|
+
img = cv2.imdecode(buf, cv2.IMREAD_UNCHANGED)
|
|
606
|
+
if img is None:
|
|
607
|
+
raise ValueError("OpenCV failed to decode image")
|
|
608
|
+
|
|
609
|
+
# Convert BGR to RGB for consistent processing (OpenCV loads as BGR)
|
|
610
|
+
# Only convert if it's a 3-channel color image
|
|
611
|
+
if img.ndim == 3 and img.shape[2] == 3:
|
|
612
|
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
|
613
|
+
except ImportError:
|
|
614
|
+
raise
|
|
615
|
+
except Exception as e:
|
|
402
616
|
raise ValueError("Unable to decode image from base64 string") from e
|
|
403
617
|
|
|
404
|
-
# Convert
|
|
405
|
-
|
|
618
|
+
# Convert to numpy array
|
|
619
|
+
img = np.array(img)
|
|
620
|
+
# Assert that 3-channel images are in RGB format after conversion
|
|
621
|
+
assert img.ndim <= 3, f"Image has unexpected number of dimensions: {img.ndim}"
|
|
622
|
+
assert img.ndim != 3 or img.shape[2] == 3, f"3-channel image should have 3 channels, got: {img.shape[2]}"
|
|
623
|
+
|
|
624
|
+
return img
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
def scale_numpy_image(
|
|
628
|
+
img_arr: np.ndarray, scale_tuple: Optional[Tuple[int, int]] = None, interpolation=Image.LANCZOS
|
|
629
|
+
) -> np.ndarray:
|
|
630
|
+
"""
|
|
631
|
+
Scales a NumPy image array using OpenCV with aspect ratio preservation.
|
|
406
632
|
|
|
407
|
-
|
|
633
|
+
This function provides OpenCV-based image scaling that mimics PIL's thumbnail behavior
|
|
634
|
+
by maintaining aspect ratio and scaling to fit within the specified dimensions.
|
|
635
|
+
|
|
636
|
+
Parameters
|
|
637
|
+
----------
|
|
638
|
+
img_arr : np.ndarray
|
|
639
|
+
The input image as a NumPy array.
|
|
640
|
+
scale_tuple : Optional[Tuple[int, int]], optional
|
|
641
|
+
A tuple (width, height) to resize the image to. If provided, the image
|
|
642
|
+
will be resized to fit within these dimensions while maintaining aspect ratio
|
|
643
|
+
(similar to PIL's thumbnail method). Defaults to None.
|
|
644
|
+
interpolation : int, optional
|
|
645
|
+
OpenCV interpolation method. Defaults to cv2.INTER_LANCZOS4.
|
|
646
|
+
|
|
647
|
+
Returns
|
|
648
|
+
-------
|
|
649
|
+
np.ndarray
|
|
650
|
+
A NumPy array representing the scaled image data.
|
|
651
|
+
"""
|
|
652
|
+
# Apply scaling using OpenCV if specified
|
|
653
|
+
# Using PIL for scaling as CV2 seems to lead to different results
|
|
654
|
+
# TODO: Remove when we move to YOLOX Ensemble Models
|
|
655
|
+
if scale_tuple:
|
|
656
|
+
image = Image.fromarray(img_arr)
|
|
657
|
+
image.thumbnail(scale_tuple, interpolation)
|
|
658
|
+
img_arr = np.array(image)
|
|
659
|
+
# Ensure we return a copy
|
|
660
|
+
return img_arr.copy()
|
|
@@ -201,6 +201,8 @@ def construct_image_metadata_from_base64(
|
|
|
201
201
|
page_count: int,
|
|
202
202
|
source_metadata: Dict[str, Any],
|
|
203
203
|
base_unified_metadata: Dict[str, Any],
|
|
204
|
+
subtype: None | ContentTypeEnum | str = "",
|
|
205
|
+
text: str = "",
|
|
204
206
|
) -> List[Any]:
|
|
205
207
|
"""
|
|
206
208
|
Extracts image data from a base64-encoded image string, decodes the image to get
|
|
@@ -252,6 +254,7 @@ def construct_image_metadata_from_base64(
|
|
|
252
254
|
"line": -1,
|
|
253
255
|
"span": -1,
|
|
254
256
|
},
|
|
257
|
+
"subtype": subtype or "",
|
|
255
258
|
}
|
|
256
259
|
|
|
257
260
|
# Construct image metadata
|
|
@@ -259,7 +262,7 @@ def construct_image_metadata_from_base64(
|
|
|
259
262
|
"image_type": DocumentTypeEnum.PNG,
|
|
260
263
|
"structured_image_type": ContentTypeEnum.UNKNOWN,
|
|
261
264
|
"caption": "",
|
|
262
|
-
"text":
|
|
265
|
+
"text": text,
|
|
263
266
|
"image_location": bbox,
|
|
264
267
|
"image_location_max_dimensions": (width, height),
|
|
265
268
|
"height": height,
|
nv_ingest_api/util/pdf/pdfium.py
CHANGED
|
@@ -7,7 +7,6 @@ from typing import List, Any
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
import PIL
|
|
11
10
|
import numpy as np
|
|
12
11
|
import pypdfium2 as pdfium
|
|
13
12
|
import pypdfium2.raw as pdfium_c
|
|
@@ -20,8 +19,9 @@ from nv_ingest_api.util.image_processing.clustering import (
|
|
|
20
19
|
combine_groups_into_bboxes,
|
|
21
20
|
remove_superset_bboxes,
|
|
22
21
|
)
|
|
23
|
-
from nv_ingest_api.util.image_processing.transforms import pad_image, numpy_to_base64, crop_image
|
|
22
|
+
from nv_ingest_api.util.image_processing.transforms import pad_image, numpy_to_base64, crop_image, scale_numpy_image
|
|
24
23
|
from nv_ingest_api.util.metadata.aggregators import Base64Image
|
|
24
|
+
from nv_ingest_api.internal.primitives.nim.model_interface.yolox import YOLOX_PAGE_IMAGE_FORMAT
|
|
25
25
|
|
|
26
26
|
logger = logging.getLogger(__name__)
|
|
27
27
|
|
|
@@ -176,18 +176,10 @@ def pdfium_pages_to_numpy(
|
|
|
176
176
|
for idx, page in enumerate(pages):
|
|
177
177
|
# Render the page as a bitmap with the specified scale and rotation
|
|
178
178
|
page_bitmap = page.render(scale=scale, rotation=rotation)
|
|
179
|
-
|
|
180
|
-
# Convert the bitmap to a PIL image
|
|
181
|
-
pil_image = page_bitmap.to_pil()
|
|
182
|
-
|
|
179
|
+
img_arr = convert_bitmap_to_corrected_numpy(page_bitmap)
|
|
183
180
|
# Apply scaling using the thumbnail approach if specified
|
|
184
181
|
if scale_tuple:
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
# Convert the PIL image to a NumPy array and force a full copy,
|
|
188
|
-
# ensuring the returned array is entirely independent of the original buffer.
|
|
189
|
-
img_arr = np.array(pil_image).copy()
|
|
190
|
-
|
|
182
|
+
img_arr = scale_numpy_image(img_arr, scale_tuple)
|
|
191
183
|
# Apply padding if specified
|
|
192
184
|
if padding_tuple:
|
|
193
185
|
img_arr, (pad_width, pad_height) = pad_image(
|
|
@@ -250,7 +242,7 @@ def extract_simple_images_from_pdfium_page(page, max_depth):
|
|
|
250
242
|
try:
|
|
251
243
|
# Attempt to retrieve the image bitmap
|
|
252
244
|
image_numpy: np.ndarray = pdfium_try_get_bitmap_as_numpy(obj) # noqa
|
|
253
|
-
image_base64: str = numpy_to_base64(image_numpy)
|
|
245
|
+
image_base64: str = numpy_to_base64(image_numpy, format=YOLOX_PAGE_IMAGE_FORMAT)
|
|
254
246
|
image_bbox = obj.get_pos()
|
|
255
247
|
image_size = obj.get_size()
|
|
256
248
|
if image_size[0] < 10 and image_size[1] < 10:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nv-ingest-api
|
|
3
|
-
Version: 2025.7.
|
|
3
|
+
Version: 2025.7.16.dev20250716
|
|
4
4
|
Summary: Python module with core document ingestion functions.
|
|
5
5
|
Author-email: Jeremy Dyer <jdyer@nvidia.com>
|
|
6
6
|
License: Apache License
|
|
@@ -217,6 +217,7 @@ Requires-Dist: backoff==2.2.1
|
|
|
217
217
|
Requires-Dist: pandas>=2.0
|
|
218
218
|
Requires-Dist: pydantic>2.0.0
|
|
219
219
|
Requires-Dist: pydantic-settings>2.0.0
|
|
220
|
+
Requires-Dist: tritonclient
|
|
220
221
|
Dynamic: license-file
|
|
221
222
|
|
|
222
223
|
# nv-ingest-api
|
|
@@ -7,7 +7,7 @@ nv_ingest_api/interface/transform.py,sha256=g6YnFR7TpEU0xNtzCvv6kqnFbuCwQ6vRMjjB
|
|
|
7
7
|
nv_ingest_api/interface/utility.py,sha256=AL4l0cJNvTjG1MAe1YNTk1jbbPED3g4HCewzx6Ffcio,7296
|
|
8
8
|
nv_ingest_api/internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
nv_ingest_api/internal/enums/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
10
|
-
nv_ingest_api/internal/enums/common.py,sha256=
|
|
10
|
+
nv_ingest_api/internal/enums/common.py,sha256=lzDJ35VWfIwlL_Lx_q0dfHUuwEB7CXudHIQAilpjoRw,12611
|
|
11
11
|
nv_ingest_api/internal/extract/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
|
|
12
12
|
nv_ingest_api/internal/extract/audio/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
13
13
|
nv_ingest_api/internal/extract/audio/audio_extraction.py,sha256=_jf_UC_FTqZr-xEpwG8edwBzdDjM01gGhqm9ulOsDcY,6973
|
|
@@ -31,11 +31,11 @@ nv_ingest_api/internal/extract/pdf/pdf_extractor.py,sha256=CxtWaD6mql9MEqSdk2CfS
|
|
|
31
31
|
nv_ingest_api/internal/extract/pdf/engines/__init__.py,sha256=u4GnAZmDKRl0RwYGIRiozIRw70Kybw3A72-lcKFeoTI,582
|
|
32
32
|
nv_ingest_api/internal/extract/pdf/engines/adobe.py,sha256=VT0dEqkU-y2uGkaCqxtKYov_Q8R1028UQVBchgMLca4,17466
|
|
33
33
|
nv_ingest_api/internal/extract/pdf/engines/llama.py,sha256=PpKTqS8jGHBV6mKLGZWwjpfT8ga6Fy8ffrvL-gPAf2c,8182
|
|
34
|
-
nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=
|
|
35
|
-
nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=
|
|
34
|
+
nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=XNYz4S2tMFBv0KFzXNERrVs-1raxJ_iIIXpBGlJFcD0,22987
|
|
35
|
+
nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=8hUJUdpx6FhOBgabFmGhJiAQdl12kR8YoSbUfN-geOk,23506
|
|
36
36
|
nv_ingest_api/internal/extract/pdf/engines/tika.py,sha256=6GyR2l6EsgNZl9jnYDXLeKNK9Fj2Mw9y2UWDq-eSkOc,3169
|
|
37
37
|
nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py,sha256=jrv2B4VZAH4PevAQrFz965qz8UyXq3rViiOTbGLejec,14908
|
|
38
|
-
nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=
|
|
38
|
+
nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=4bvN6LsPksLicI6jM0JqbJFiOZNHEcuc8MVVW4XfgV8,5875
|
|
39
39
|
nv_ingest_api/internal/extract/pptx/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
|
|
40
40
|
nv_ingest_api/internal/extract/pptx/pptx_extractor.py,sha256=o-0P2dDyRFW37uQi_lKk6-eFozTcZvbq-2Y4I0EBMIY,7749
|
|
41
41
|
nv_ingest_api/internal/extract/pptx/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -55,12 +55,12 @@ nv_ingest_api/internal/primitives/nim/model_interface/cached.py,sha256=b1HX-PY1E
|
|
|
55
55
|
nv_ingest_api/internal/primitives/nim/model_interface/decorators.py,sha256=qwubkHs4WjnexM6rI0wkjWCsrVNEbA4Wjk2oKL9OYCU,1499
|
|
56
56
|
nv_ingest_api/internal/primitives/nim/model_interface/deplot.py,sha256=TvKdk6PTuI1WNhRmNNrvygaI_DIutkJkDL-XdtLZQac,10787
|
|
57
57
|
nv_ingest_api/internal/primitives/nim/model_interface/helpers.py,sha256=x35a9AyTYxpESQflLo_YnhVOKblQKVen6vGGFaXmNiE,9927
|
|
58
|
-
nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py,sha256=
|
|
58
|
+
nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py,sha256=WysjDZeegclO3mZgVcGOwzWbr8wSI4pWRiYD4iC2EXo,7098
|
|
59
59
|
nv_ingest_api/internal/primitives/nim/model_interface/paddle.py,sha256=rSUPwl5XOrqneoS6aKhatVjrNBg_LhP3nwUWS_aTwz0,17950
|
|
60
60
|
nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py,sha256=5PqD2JuHY2rwd-6SSB4axr2Dd79vm95sAEkcmI3U7ME,12977
|
|
61
61
|
nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py,sha256=lFhppNqrq5X_fzbCWKphvZQMzaJd3gHrkWsyJORzFrU,5010
|
|
62
62
|
nv_ingest_api/internal/primitives/nim/model_interface/vlm.py,sha256=qJ382PU1ZrIM-SR3cqIhtY_W2rmHec2HIa2aUB2SvaU,6031
|
|
63
|
-
nv_ingest_api/internal/primitives/nim/model_interface/yolox.py,sha256=
|
|
63
|
+
nv_ingest_api/internal/primitives/nim/model_interface/yolox.py,sha256=nsfDQgeupBe9Tdf3S5sfNpYcObEwVlzCZdfg1ObAW88,49584
|
|
64
64
|
nv_ingest_api/internal/primitives/tracing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
65
65
|
nv_ingest_api/internal/primitives/tracing/latency.py,sha256=5kVTeYRbRdTlT_aI4MeS20N_S7mqCcLqZR6YHtxhXkY,2215
|
|
66
66
|
nv_ingest_api/internal/primitives/tracing/logging.py,sha256=SSzIgS7afLH-e1C7VagYDmkkA6rTXmQ-bmtLjoEguhg,3851
|
|
@@ -82,7 +82,7 @@ nv_ingest_api/internal/schemas/message_brokers/request_schema.py,sha256=LZX_wXDx
|
|
|
82
82
|
nv_ingest_api/internal/schemas/message_brokers/response_schema.py,sha256=4b275HlzBSzpmuE2wdoeaGKPCdKki3wuWldtRIfrj8w,727
|
|
83
83
|
nv_ingest_api/internal/schemas/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
84
84
|
nv_ingest_api/internal/schemas/meta/base_model_noext.py,sha256=8hXU1uuiqZ6t8EsoZ8vlC5EFf2zSZrKEX133FcfZMwI,316
|
|
85
|
-
nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=
|
|
85
|
+
nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=ceYQjRjhBSDbbZ6q-Db7Y6GHVOvWPdGAMb3TX1vMWfY,8321
|
|
86
86
|
nv_ingest_api/internal/schemas/meta/metadata_schema.py,sha256=VnAzkSFat_ckI19mlwQTlFrvP6EZVCwyNl9bt51b8oU,7193
|
|
87
87
|
nv_ingest_api/internal/schemas/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
88
88
|
nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py,sha256=k1JOdlPPpsipc0XhHf-9YxJ_-W0HvpVE1ZhYmr7fzj0,395
|
|
@@ -92,14 +92,14 @@ nv_ingest_api/internal/schemas/store/store_image_schema.py,sha256=p2LGij9i6sG6RY
|
|
|
92
92
|
nv_ingest_api/internal/schemas/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
93
93
|
nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=OtM1iPw26uioC3mghbOJQurKGg641uQfhASH462VqOY,578
|
|
94
94
|
nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py,sha256=31ThI5fr0yyENeJeE1xMAA-pxk1QVJLwM842zMate_k,429
|
|
95
|
-
nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=
|
|
95
|
+
nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=RZCISA8CUqKiY8eJuk4uWxzo4PZ-fuYdzMO7_LYFkoM,1117
|
|
96
96
|
nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py,sha256=D9K8tvu-tkEBQkZo7uuRzgrHdGyM3ZcNycHbHy5HV2E,791
|
|
97
97
|
nv_ingest_api/internal/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
98
98
|
nv_ingest_api/internal/store/embed_text_upload.py,sha256=maxb4FPsBvWgvlrjAPEBlRZEFdJX5NxPG-p8kUbzV7I,9898
|
|
99
99
|
nv_ingest_api/internal/store/image_upload.py,sha256=GNlY4k3pfcHv3lzXxkbmGLeHFsf9PI25bkBn6Xn9h3I,9654
|
|
100
100
|
nv_ingest_api/internal/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
101
101
|
nv_ingest_api/internal/transform/caption_image.py,sha256=0ILCG2F8ESqKtZiPUM-6F1BHUflFZ76Dzi2GNzkE-lU,8517
|
|
102
|
-
nv_ingest_api/internal/transform/embed_text.py,sha256=
|
|
102
|
+
nv_ingest_api/internal/transform/embed_text.py,sha256=kvVGlNH1S91UENXWLD31uh3KzlfJYOlYitpIFMsyowU,20033
|
|
103
103
|
nv_ingest_api/internal/transform/split_text.py,sha256=-kwpRWSVZrPldm1hn3-tVz_TkzuKM-kPvNU3HTp9zOY,7476
|
|
104
104
|
nv_ingest_api/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
105
105
|
nv_ingest_api/util/control_message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -123,7 +123,7 @@ nv_ingest_api/util/image_processing/__init__.py,sha256=Jiy8C1ZuSrNb_eBM1ZTV9IKFI
|
|
|
123
123
|
nv_ingest_api/util/image_processing/clustering.py,sha256=sUGlZI4cx1q8h4Pns1N9JVpdfSM2BOH8zRmn9QFCtzI,9236
|
|
124
124
|
nv_ingest_api/util/image_processing/processing.py,sha256=LSoDDEmahr7a-qSS12McVcowRe3dOrAZwa1h-PD_JPQ,6554
|
|
125
125
|
nv_ingest_api/util/image_processing/table_and_chart.py,sha256=bxOu9PZYkG_WFCDGw_JLaO60S2pDSN8EOWK3xkIwr2A,14376
|
|
126
|
-
nv_ingest_api/util/image_processing/transforms.py,sha256=
|
|
126
|
+
nv_ingest_api/util/image_processing/transforms.py,sha256=3-xeUerc2AaXJTYuR23EjwdtjRQ8F85pS5D9zxR4cLA,23452
|
|
127
127
|
nv_ingest_api/util/imports/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
128
128
|
nv_ingest_api/util/imports/callable_signatures.py,sha256=e2bJB1pmkN4Ee-Bf-VggOSBaQ4RXofWF5eKkWXgIj2U,1855
|
|
129
129
|
nv_ingest_api/util/imports/dynamic_resolvers.py,sha256=7GByV_-8z2X0tnVoabCxVioxOP3sYMros3ZllVAW-wY,4343
|
|
@@ -135,12 +135,12 @@ nv_ingest_api/util/message_brokers/simple_message_broker/broker.py,sha256=h9Q4q_
|
|
|
135
135
|
nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py,sha256=3p-LRqG8qLnsfEhBNf73_DG22C08JKahTqUvPLS2Apg,2554
|
|
136
136
|
nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py,sha256=fh7Q0wO5H_FtrHV1VdT6V66aZNqglOh_2XdkfLt8hgg,15722
|
|
137
137
|
nv_ingest_api/util/metadata/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
|
|
138
|
-
nv_ingest_api/util/metadata/aggregators.py,sha256=
|
|
138
|
+
nv_ingest_api/util/metadata/aggregators.py,sha256=YYdvJ1E04eGFZKKHUxXoH6mzLg8nor9Smvnv0qzqK5w,15988
|
|
139
139
|
nv_ingest_api/util/multi_processing/__init__.py,sha256=4fojP8Rp_5Hu1YAkqGylqTyEZ-HBVVEunn5Z9I99swA,242
|
|
140
140
|
nv_ingest_api/util/multi_processing/mp_pool_singleton.py,sha256=dTfP82DgGPaXEJH3jywTO8rNlLZUniD4FFzwv84_giE,7372
|
|
141
141
|
nv_ingest_api/util/nim/__init__.py,sha256=UqbiXFCqjWcjNvoduXd_0gOUOGBT8JvppiYHOmMyneA,1775
|
|
142
142
|
nv_ingest_api/util/pdf/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
|
|
143
|
-
nv_ingest_api/util/pdf/pdfium.py,sha256=
|
|
143
|
+
nv_ingest_api/util/pdf/pdfium.py,sha256=qTiTlSaiCk_rxm_eoQBoAFKq_5OQrioHVSbPbGDxVkE,15668
|
|
144
144
|
nv_ingest_api/util/schema/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
145
145
|
nv_ingest_api/util/schema/schema_validator.py,sha256=H0yZ_i_HZaiBRUCGmTBfRB9-hURhVqyd10aS_ynM1_0,321
|
|
146
146
|
nv_ingest_api/util/service_clients/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
@@ -153,8 +153,8 @@ nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=dZ-jrk7IK7oNtHoXFS
|
|
|
153
153
|
nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
|
|
154
154
|
nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
155
|
nv_ingest_api/util/system/hardware_info.py,sha256=ORZeKpH9kSGU_vuPhyBwkIiMyCViKUX2CP__MCjrfbU,19463
|
|
156
|
-
nv_ingest_api-2025.7.
|
|
157
|
-
nv_ingest_api-2025.7.
|
|
158
|
-
nv_ingest_api-2025.7.
|
|
159
|
-
nv_ingest_api-2025.7.
|
|
160
|
-
nv_ingest_api-2025.7.
|
|
156
|
+
nv_ingest_api-2025.7.16.dev20250716.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
157
|
+
nv_ingest_api-2025.7.16.dev20250716.dist-info/METADATA,sha256=RaPAkQ4Dtkkrn6hi9Va1t2XDpDgRbe-bFqmCVL3IlEA,13947
|
|
158
|
+
nv_ingest_api-2025.7.16.dev20250716.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
159
|
+
nv_ingest_api-2025.7.16.dev20250716.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
|
|
160
|
+
nv_ingest_api-2025.7.16.dev20250716.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|