nv-ingest-api 2025.8.19.dev20250819__py3-none-any.whl → 2025.8.20.dev20250820__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/internal/extract/pdf/engines/llama.py +4 -1
- nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +20 -2
- nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +15 -2
- nv_ingest_api/internal/transform/embed_text.py +4 -1
- {nv_ingest_api-2025.8.19.dev20250819.dist-info → nv_ingest_api-2025.8.20.dev20250820.dist-info}/METADATA +1 -1
- {nv_ingest_api-2025.8.19.dev20250819.dist-info → nv_ingest_api-2025.8.20.dev20250820.dist-info}/RECORD +9 -9
- {nv_ingest_api-2025.8.19.dev20250819.dist-info → nv_ingest_api-2025.8.20.dev20250820.dist-info}/WHEEL +0 -0
- {nv_ingest_api-2025.8.19.dev20250819.dist-info → nv_ingest_api-2025.8.20.dev20250820.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.8.19.dev20250819.dist-info → nv_ingest_api-2025.8.20.dev20250820.dist-info}/top_level.txt +0 -0
|
@@ -193,7 +193,10 @@ async def async_llama_parse(
|
|
|
193
193
|
A string of extracted text.
|
|
194
194
|
"""
|
|
195
195
|
base_url = "https://api.cloud.llamaindex.ai/api/parsing"
|
|
196
|
-
|
|
196
|
+
# Normalize in case api_key contains only whitespace; avoid sending an empty bearer token
|
|
197
|
+
_token = (api_key or "").strip()
|
|
198
|
+
_auth_value = f"Bearer {_token}" if _token else "Bearer <no key provided>"
|
|
199
|
+
headers = {"Authorization": _auth_value}
|
|
197
200
|
mime_type = "application/pdf"
|
|
198
201
|
|
|
199
202
|
try:
|
|
@@ -3,13 +3,31 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
from pydantic import ConfigDict, BaseModel
|
|
6
|
+
from pydantic import ConfigDict, BaseModel, model_validator, field_validator
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class ImageCaptionExtractionSchema(BaseModel):
|
|
10
|
-
api_key: str = "
|
|
10
|
+
api_key: str = ""
|
|
11
11
|
endpoint_url: str = "https://integrate.api.nvidia.com/v1/chat/completions"
|
|
12
12
|
prompt: str = "Caption the content of this image:"
|
|
13
13
|
model_name: str = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
|
|
14
14
|
raise_on_failure: bool = False
|
|
15
15
|
model_config = ConfigDict(extra="forbid")
|
|
16
|
+
|
|
17
|
+
@field_validator("api_key", mode="before")
|
|
18
|
+
@classmethod
|
|
19
|
+
def _coerce_api_key_none(cls, v):
|
|
20
|
+
return "" if v is None else v
|
|
21
|
+
|
|
22
|
+
@model_validator(mode="before")
|
|
23
|
+
@classmethod
|
|
24
|
+
def _coerce_none_to_empty(cls, values):
|
|
25
|
+
"""Allow None for string fields where empty string is acceptable.
|
|
26
|
+
|
|
27
|
+
Specifically, convert api_key=None to api_key="" so validation passes
|
|
28
|
+
when no API key is supplied.
|
|
29
|
+
"""
|
|
30
|
+
if isinstance(values, dict):
|
|
31
|
+
if values.get("api_key") is None:
|
|
32
|
+
values["api_key"] = ""
|
|
33
|
+
return values
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
7
|
|
|
8
|
-
from pydantic import ConfigDict, BaseModel, Field
|
|
8
|
+
from pydantic import ConfigDict, BaseModel, Field, model_validator, field_validator
|
|
9
9
|
|
|
10
10
|
from nv_ingest_api.util.logging.configuration import LogLevel
|
|
11
11
|
|
|
@@ -13,7 +13,7 @@ logger = logging.getLogger(__name__)
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class TextEmbeddingSchema(BaseModel):
|
|
16
|
-
api_key: str = Field(default="
|
|
16
|
+
api_key: str = Field(default="")
|
|
17
17
|
batch_size: int = Field(default=4)
|
|
18
18
|
embedding_model: str = Field(default="nvidia/llama-3.2-nv-embedqa-1b-v2")
|
|
19
19
|
embedding_nim_endpoint: str = Field(default="http://embedding:8000/v1")
|
|
@@ -28,3 +28,16 @@ class TextEmbeddingSchema(BaseModel):
|
|
|
28
28
|
audio_elements_modality: str = Field(default="text")
|
|
29
29
|
|
|
30
30
|
model_config = ConfigDict(extra="forbid")
|
|
31
|
+
|
|
32
|
+
@field_validator("api_key", mode="before")
|
|
33
|
+
@classmethod
|
|
34
|
+
def _coerce_api_key_none(cls, v):
|
|
35
|
+
return "" if v is None else v
|
|
36
|
+
|
|
37
|
+
@model_validator(mode="before")
|
|
38
|
+
@classmethod
|
|
39
|
+
def _coerce_none_to_empty(cls, values):
|
|
40
|
+
"""Convert api_key=None to empty string so validation passes when key is omitted."""
|
|
41
|
+
if isinstance(values, dict) and values.get("api_key") is None:
|
|
42
|
+
values["api_key"] = ""
|
|
43
|
+
return values
|
|
@@ -75,8 +75,11 @@ def _make_async_request(
|
|
|
75
75
|
response = {}
|
|
76
76
|
|
|
77
77
|
try:
|
|
78
|
+
# Normalize API key to avoid sending an empty bearer token via SDK internals
|
|
79
|
+
_token = (api_key or "").strip()
|
|
80
|
+
_api_key = _token if _token else "<no key provided>"
|
|
78
81
|
client = OpenAI(
|
|
79
|
-
api_key=
|
|
82
|
+
api_key=_api_key,
|
|
80
83
|
base_url=embedding_nim_endpoint,
|
|
81
84
|
)
|
|
82
85
|
|
|
@@ -30,7 +30,7 @@ nv_ingest_api/internal/extract/pdf/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusX
|
|
|
30
30
|
nv_ingest_api/internal/extract/pdf/pdf_extractor.py,sha256=CxtWaD6mql9MEqSdk2CfSQ9T-Bn87beBkCOuGGjxGt8,2934
|
|
31
31
|
nv_ingest_api/internal/extract/pdf/engines/__init__.py,sha256=u4GnAZmDKRl0RwYGIRiozIRw70Kybw3A72-lcKFeoTI,582
|
|
32
32
|
nv_ingest_api/internal/extract/pdf/engines/adobe.py,sha256=VT0dEqkU-y2uGkaCqxtKYov_Q8R1028UQVBchgMLca4,17466
|
|
33
|
-
nv_ingest_api/internal/extract/pdf/engines/llama.py,sha256=
|
|
33
|
+
nv_ingest_api/internal/extract/pdf/engines/llama.py,sha256=MwzM-n2tu0FHM0wDe_0mONLlzHrPte7EOTuPtzCh7Zs,8384
|
|
34
34
|
nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=IVbNcH_phMiRSxnkZ04pGfQrPJ-x1zVR3hXyhxv7juc,22977
|
|
35
35
|
nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=SKmias2iZmAE6Q8WXxmFEjvLOZy-vXRoaRIPpi7Tuhs,22962
|
|
36
36
|
nv_ingest_api/internal/extract/pdf/engines/tika.py,sha256=6GyR2l6EsgNZl9jnYDXLeKNK9Fj2Mw9y2UWDq-eSkOc,3169
|
|
@@ -93,16 +93,16 @@ nv_ingest_api/internal/schemas/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQu
|
|
|
93
93
|
nv_ingest_api/internal/schemas/store/store_embedding_schema.py,sha256=tdKeiraim9CDL9htgp4oUSCoPMoO5PrHBnlXqDyCpMw,956
|
|
94
94
|
nv_ingest_api/internal/schemas/store/store_image_schema.py,sha256=p2LGij9i6sG6RYmsfdiQOiWIc2j-POjxYrNuMrp3ELU,1010
|
|
95
95
|
nv_ingest_api/internal/schemas/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
96
|
-
nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=
|
|
96
|
+
nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=O7rOJI9MunXrQVbONtc8mXzZqVLAx5BjqjfhqQOWlQY,1154
|
|
97
97
|
nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py,sha256=31ThI5fr0yyENeJeE1xMAA-pxk1QVJLwM842zMate_k,429
|
|
98
|
-
nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=
|
|
98
|
+
nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=D6ZFY6m37HqZ9NBGaeqI9VD0z1sWI55V8KTflr8MZ5o,1604
|
|
99
99
|
nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py,sha256=D9K8tvu-tkEBQkZo7uuRzgrHdGyM3ZcNycHbHy5HV2E,791
|
|
100
100
|
nv_ingest_api/internal/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
101
101
|
nv_ingest_api/internal/store/embed_text_upload.py,sha256=maxb4FPsBvWgvlrjAPEBlRZEFdJX5NxPG-p8kUbzV7I,9898
|
|
102
102
|
nv_ingest_api/internal/store/image_upload.py,sha256=GNlY4k3pfcHv3lzXxkbmGLeHFsf9PI25bkBn6Xn9h3I,9654
|
|
103
103
|
nv_ingest_api/internal/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
104
104
|
nv_ingest_api/internal/transform/caption_image.py,sha256=0ILCG2F8ESqKtZiPUM-6F1BHUflFZ76Dzi2GNzkE-lU,8517
|
|
105
|
-
nv_ingest_api/internal/transform/embed_text.py,sha256=
|
|
105
|
+
nv_ingest_api/internal/transform/embed_text.py,sha256=LB_2Zvw6plc7uOWT2QN13aDu2qFumXzl_RB3ZcZbLGs,20191
|
|
106
106
|
nv_ingest_api/internal/transform/split_text.py,sha256=LAtInGVuydH43UwjNMQWFVC1A6NdhXP_dZup2xX4qEo,7745
|
|
107
107
|
nv_ingest_api/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
108
108
|
nv_ingest_api/util/control_message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -161,8 +161,8 @@ nv_ingest_api/util/string_processing/configuration.py,sha256=2HS08msccuPCT0fn_jf
|
|
|
161
161
|
nv_ingest_api/util/string_processing/yaml.py,sha256=6SW2O6wbXRhGbhETMbtXjYCZn53HeCNOP6a96AaxlHs,1454
|
|
162
162
|
nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
163
163
|
nv_ingest_api/util/system/hardware_info.py,sha256=1UFM8XE6M3pgQcpbVsCsqDQ7Dj-zzptL-XRE-DEu9UA,27213
|
|
164
|
-
nv_ingest_api-2025.8.
|
|
165
|
-
nv_ingest_api-2025.8.
|
|
166
|
-
nv_ingest_api-2025.8.
|
|
167
|
-
nv_ingest_api-2025.8.
|
|
168
|
-
nv_ingest_api-2025.8.
|
|
164
|
+
nv_ingest_api-2025.8.20.dev20250820.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
165
|
+
nv_ingest_api-2025.8.20.dev20250820.dist-info/METADATA,sha256=wtjzUuf6l64PjK1i3_XbMB3RsOUhWReCx0hOStoxLT8,13947
|
|
166
|
+
nv_ingest_api-2025.8.20.dev20250820.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
167
|
+
nv_ingest_api-2025.8.20.dev20250820.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
|
|
168
|
+
nv_ingest_api-2025.8.20.dev20250820.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|