nv-ingest-api 2025.8.20.dev20250820__py3-none-any.whl → 2025.8.21.dev20250821__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/interface/__init__.py +14 -11
- nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +3 -2
- nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +3 -3
- nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +2 -2
- nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +2 -2
- nv_ingest_api/internal/schemas/extract/extract_image_schema.py +2 -2
- nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +2 -2
- nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +3 -3
- nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +2 -2
- nv_ingest_api/internal/schemas/extract/extract_table_schema.py +2 -2
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +4 -4
- nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +2 -2
- nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +1 -1
- nv_ingest_api/util/logging/sanitize.py +84 -0
- {nv_ingest_api-2025.8.20.dev20250820.dist-info → nv_ingest_api-2025.8.21.dev20250821.dist-info}/METADATA +1 -1
- {nv_ingest_api-2025.8.20.dev20250820.dist-info → nv_ingest_api-2025.8.21.dev20250821.dist-info}/RECORD +19 -18
- {nv_ingest_api-2025.8.20.dev20250820.dist-info → nv_ingest_api-2025.8.21.dev20250821.dist-info}/WHEEL +0 -0
- {nv_ingest_api-2025.8.20.dev20250820.dist-info → nv_ingest_api-2025.8.21.dev20250821.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.8.20.dev20250820.dist-info → nv_ingest_api-2025.8.21.dev20250821.dist-info}/top_level.txt +0 -0
|
@@ -11,6 +11,7 @@ from typing import Dict, Any, Optional, List
|
|
|
11
11
|
from pydantic import BaseModel
|
|
12
12
|
|
|
13
13
|
from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import PDFiumConfigSchema, NemoRetrieverParseConfigSchema
|
|
14
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
16
17
|
|
|
@@ -180,29 +181,31 @@ def extraction_interface_relay_constructor(api_fn, task_keys: Optional[List[str]
|
|
|
180
181
|
if extractor_schema is None:
|
|
181
182
|
extractor_schema = {f"{extract_method}_config": extraction_config_dict}
|
|
182
183
|
|
|
183
|
-
# Log the task and extractor configurations for debugging
|
|
184
|
+
# Log the task and extractor configurations for debugging (sanitized)
|
|
184
185
|
logger.debug("\n" + "=" * 80)
|
|
185
186
|
logger.debug(f"DEBUG - API Function: {api_fn.__name__}")
|
|
186
187
|
logger.debug(f"DEBUG - Extract Method: {extract_method}")
|
|
187
188
|
logger.debug("-" * 80)
|
|
188
189
|
|
|
189
|
-
#
|
|
190
|
-
|
|
191
|
-
|
|
190
|
+
# Sanitize and format the task config as a string and log it
|
|
191
|
+
sanitized_task_config = sanitize_for_logging(task_config)
|
|
192
|
+
task_config_str = pprint.pformat(sanitized_task_config, width=100, sort_dicts=False)
|
|
193
|
+
logger.debug(f"DEBUG - Task Config (sanitized):\n{task_config_str}")
|
|
192
194
|
logger.debug("-" * 80)
|
|
193
195
|
|
|
194
|
-
#
|
|
196
|
+
# Sanitize and format the extractor config as a string and log it
|
|
195
197
|
if hasattr(extractor_schema, "model_dump"):
|
|
196
|
-
|
|
198
|
+
sanitized_extractor_config = sanitize_for_logging(extractor_schema.model_dump())
|
|
197
199
|
else:
|
|
198
|
-
|
|
200
|
+
sanitized_extractor_config = sanitize_for_logging(extractor_schema)
|
|
201
|
+
extractor_config_str = pprint.pformat(sanitized_extractor_config, width=100, sort_dicts=False)
|
|
199
202
|
logger.debug(f"DEBUG - Extractor Config Type: {type(extractor_schema)}")
|
|
200
|
-
logger.debug(f"DEBUG - Extractor Config:\n{extractor_config_str}")
|
|
203
|
+
logger.debug(f"DEBUG - Extractor Config (sanitized):\n{extractor_config_str}")
|
|
201
204
|
logger.debug("=" * 80 + "\n")
|
|
202
205
|
|
|
203
|
-
# Call the backend API function.
|
|
204
|
-
pprint.pprint(
|
|
205
|
-
pprint.pprint(
|
|
206
|
+
# Call the backend API function. Print sanitized configs for any debug consumers of stdout.
|
|
207
|
+
pprint.pprint(sanitized_task_config)
|
|
208
|
+
pprint.pprint(sanitized_extractor_config)
|
|
206
209
|
result = api_fn(ledger, task_config, extractor_schema, execution_trace_log)
|
|
207
210
|
|
|
208
211
|
# If the result is a tuple, return only the first element
|
|
@@ -11,6 +11,7 @@ from typing import Any
|
|
|
11
11
|
from typing import Dict
|
|
12
12
|
from typing import List
|
|
13
13
|
from typing import Optional
|
|
14
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
14
15
|
|
|
15
16
|
import pandas as pd
|
|
16
17
|
from nv_ingest_api.internal.extract.pdf.engines import adobe_extractor
|
|
@@ -131,7 +132,7 @@ def _orchestrate_row_extraction(
|
|
|
131
132
|
method_config = extractor_config[config_key]
|
|
132
133
|
else:
|
|
133
134
|
# If no matching config is found, log a warning but don't fail
|
|
134
|
-
logger.warning(f"No {config_key} found in extractor_config: {extractor_config}")
|
|
135
|
+
logger.warning(f"No {config_key} found in extractor_config: {sanitize_for_logging(extractor_config)}")
|
|
135
136
|
method_config = None
|
|
136
137
|
|
|
137
138
|
# Add the method-specific config to the parameters if available
|
|
@@ -141,7 +142,7 @@ def _orchestrate_row_extraction(
|
|
|
141
142
|
|
|
142
143
|
# The resulting parameters constitute the complete extractor_config
|
|
143
144
|
extractor_config = params
|
|
144
|
-
logger.debug(f"Final extractor_config: {extractor_config}")
|
|
145
|
+
logger.debug(f"Final extractor_config: {sanitize_for_logging(extractor_config)}")
|
|
145
146
|
|
|
146
147
|
result = _work_extract_pdf(
|
|
147
148
|
pdf_stream=pdf_stream,
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
from pydantic import BaseModel
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
11
|
from pydantic import root_validator
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
@@ -42,12 +42,12 @@ class AudioConfigSchema(BaseModel):
|
|
|
42
42
|
Pydantic config option to forbid extra fields.
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
auth_token: Optional[str] = None
|
|
45
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
46
46
|
audio_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
47
47
|
audio_infer_protocol: Optional[str] = None
|
|
48
48
|
function_id: Optional[str] = None
|
|
49
49
|
use_ssl: Optional[bool] = None
|
|
50
|
-
ssl_cert: Optional[str] = None
|
|
50
|
+
ssl_cert: Optional[str] = Field(default=None, repr=False)
|
|
51
51
|
segment_audio: Optional[bool] = None
|
|
52
52
|
|
|
53
53
|
@root_validator(pre=True)
|
|
@@ -6,7 +6,7 @@ import logging
|
|
|
6
6
|
from typing import Optional
|
|
7
7
|
from typing import Tuple
|
|
8
8
|
|
|
9
|
-
from pydantic import field_validator, model_validator, ConfigDict, BaseModel
|
|
9
|
+
from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
@@ -44,7 +44,7 @@ class ChartExtractorConfigSchema(BaseModel):
|
|
|
44
44
|
Pydantic config option to forbid extra fields.
|
|
45
45
|
"""
|
|
46
46
|
|
|
47
|
-
auth_token: Optional[str] = None
|
|
47
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
48
48
|
|
|
49
49
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
50
50
|
yolox_infer_protocol: str = ""
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
from pydantic import model_validator, ConfigDict, BaseModel
|
|
10
|
+
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -41,7 +41,7 @@ class DocxConfigSchema(BaseModel):
|
|
|
41
41
|
Pydantic config option to forbid extra fields.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
auth_token: Optional[str] = None
|
|
44
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
45
45
|
|
|
46
46
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
47
47
|
yolox_infer_protocol: str = ""
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
from pydantic import model_validator, ConfigDict, BaseModel
|
|
10
|
+
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -41,7 +41,7 @@ class ImageConfigSchema(BaseModel):
|
|
|
41
41
|
Pydantic config option to forbid extra fields.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
auth_token: Optional[str] = None
|
|
44
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
45
45
|
|
|
46
46
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
47
47
|
yolox_infer_protocol: str = ""
|
|
@@ -6,7 +6,7 @@ import logging
|
|
|
6
6
|
from typing import Optional
|
|
7
7
|
from typing import Tuple
|
|
8
8
|
|
|
9
|
-
from pydantic import field_validator, model_validator, ConfigDict, BaseModel
|
|
9
|
+
from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
@@ -40,7 +40,7 @@ class InfographicExtractorConfigSchema(BaseModel):
|
|
|
40
40
|
Pydantic config option to forbid extra fields.
|
|
41
41
|
"""
|
|
42
42
|
|
|
43
|
-
auth_token: Optional[str] = None
|
|
43
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
44
44
|
|
|
45
45
|
ocr_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
46
46
|
ocr_infer_protocol: str = ""
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
from pydantic import model_validator, ConfigDict, BaseModel
|
|
10
|
+
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -41,7 +41,7 @@ class PDFiumConfigSchema(BaseModel):
|
|
|
41
41
|
Pydantic config option to forbid extra fields.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
auth_token: Optional[str] = None
|
|
44
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
45
45
|
|
|
46
46
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
47
47
|
yolox_infer_protocol: str = ""
|
|
@@ -123,7 +123,7 @@ class NemoRetrieverParseConfigSchema(BaseModel):
|
|
|
123
123
|
Pydantic config option to forbid extra fields.
|
|
124
124
|
"""
|
|
125
125
|
|
|
126
|
-
auth_token: Optional[str] = None
|
|
126
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
127
127
|
|
|
128
128
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
129
129
|
yolox_infer_protocol: str = ""
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
from pydantic import model_validator, ConfigDict, BaseModel
|
|
10
|
+
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -41,7 +41,7 @@ class PPTXConfigSchema(BaseModel):
|
|
|
41
41
|
Pydantic config option to forbid extra fields.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
auth_token: Optional[str] = None
|
|
44
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
45
45
|
|
|
46
46
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
47
47
|
yolox_infer_protocol: str = ""
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
from pydantic import field_validator, model_validator, ConfigDict, BaseModel
|
|
10
|
+
from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
@@ -42,7 +42,7 @@ class TableExtractorConfigSchema(BaseModel):
|
|
|
42
42
|
Pydantic config option to forbid extra fields.
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
auth_token: Optional[str] = None
|
|
45
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
46
46
|
|
|
47
47
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
48
48
|
yolox_infer_protocol: str = ""
|
|
@@ -73,7 +73,7 @@ class IngestTaskStoreSchema(BaseModelNoExt):
|
|
|
73
73
|
|
|
74
74
|
# Captioning: All fields are optional and override default parameters.
|
|
75
75
|
class IngestTaskCaptionSchema(BaseModelNoExt):
|
|
76
|
-
api_key: Optional[str] = None
|
|
76
|
+
api_key: Optional[str] = Field(default=None, repr=False)
|
|
77
77
|
endpoint_url: Optional[str] = None
|
|
78
78
|
prompt: Optional[str] = None
|
|
79
79
|
model_name: Optional[str] = None
|
|
@@ -105,7 +105,7 @@ class IngestTaskDedupSchema(BaseModelNoExt):
|
|
|
105
105
|
class IngestTaskEmbedSchema(BaseModelNoExt):
|
|
106
106
|
endpoint_url: Optional[str] = None
|
|
107
107
|
model_name: Optional[str] = None
|
|
108
|
-
api_key: Optional[str] = None
|
|
108
|
+
api_key: Optional[str] = Field(default=None, repr=False)
|
|
109
109
|
filter_errors: bool = False
|
|
110
110
|
text_elements_modality: Optional[str] = None
|
|
111
111
|
image_elements_modality: Optional[str] = None
|
|
@@ -121,13 +121,13 @@ class IngestTaskVdbUploadSchema(BaseModelNoExt):
|
|
|
121
121
|
|
|
122
122
|
|
|
123
123
|
class IngestTaskAudioExtraction(BaseModelNoExt):
|
|
124
|
-
auth_token: Optional[str] = None
|
|
124
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
125
125
|
grpc_endpoint: Optional[str] = None
|
|
126
126
|
http_endpoint: Optional[str] = None
|
|
127
127
|
infer_protocol: Optional[str] = None
|
|
128
128
|
function_id: Optional[str] = None
|
|
129
129
|
use_ssl: Optional[bool] = None
|
|
130
|
-
ssl_cert: Optional[str] = None
|
|
130
|
+
ssl_cert: Optional[str] = Field(default=None, repr=False)
|
|
131
131
|
segment_audio: Optional[bool] = None
|
|
132
132
|
|
|
133
133
|
|
|
@@ -3,11 +3,11 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
from pydantic import ConfigDict, BaseModel, model_validator, field_validator
|
|
6
|
+
from pydantic import ConfigDict, BaseModel, model_validator, field_validator, Field
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class ImageCaptionExtractionSchema(BaseModel):
|
|
10
|
-
api_key: str = ""
|
|
10
|
+
api_key: str = Field(default="", repr=False)
|
|
11
11
|
endpoint_url: str = "https://integrate.api.nvidia.com/v1/chat/completions"
|
|
12
12
|
prompt: str = "Caption the content of this image:"
|
|
13
13
|
model_name: str = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
|
|
@@ -13,7 +13,7 @@ logger = logging.getLogger(__name__)
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class TextEmbeddingSchema(BaseModel):
|
|
16
|
-
api_key: str = Field(default="")
|
|
16
|
+
api_key: str = Field(default="", repr=False)
|
|
17
17
|
batch_size: int = Field(default=4)
|
|
18
18
|
embedding_model: str = Field(default="nvidia/llama-3.2-nv-embedqa-1b-v2")
|
|
19
19
|
embedding_nim_endpoint: str = Field(default="http://embedding:8000/v1")
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any, Mapping, MutableMapping, Sequence, Set
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
# Pydantic is optional at runtime for this helper; import if available
|
|
11
|
+
from pydantic import BaseModel # type: ignore
|
|
12
|
+
except Exception: # pragma: no cover - pydantic always present in this repo
|
|
13
|
+
BaseModel = None # type: ignore
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_DEFAULT_SENSITIVE_KEYS: Set[str] = {
|
|
17
|
+
"access_token",
|
|
18
|
+
"api_key",
|
|
19
|
+
"authorization",
|
|
20
|
+
"auth_token",
|
|
21
|
+
"client_secret",
|
|
22
|
+
"hf_access_token",
|
|
23
|
+
"hugging_face_access_token",
|
|
24
|
+
"password",
|
|
25
|
+
"refresh_token",
|
|
26
|
+
"secret",
|
|
27
|
+
"ssl_cert",
|
|
28
|
+
"x-api-key",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
_REDACTION = "***REDACTED***"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _is_mapping(obj: Any) -> bool:
|
|
35
|
+
try:
|
|
36
|
+
return isinstance(obj, Mapping)
|
|
37
|
+
except Exception:
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _is_sequence(obj: Any) -> bool:
|
|
42
|
+
# Exclude strings/bytes from sequences we want to traverse
|
|
43
|
+
return isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def sanitize_for_logging(
|
|
47
|
+
data: Any,
|
|
48
|
+
sensitive_keys: Set[str] | None = None,
|
|
49
|
+
redaction: str = _REDACTION,
|
|
50
|
+
) -> Any:
|
|
51
|
+
"""
|
|
52
|
+
Recursively sanitize common secret fields from dicts, lists, tuples, and Pydantic models.
|
|
53
|
+
|
|
54
|
+
- Key comparison is case-insensitive and matches exact keys only.
|
|
55
|
+
- Does not mutate input; returns a sanitized deep copy.
|
|
56
|
+
- For Pydantic BaseModel instances, uses model_dump() before redaction.
|
|
57
|
+
"""
|
|
58
|
+
keys = {k.lower() for k in (sensitive_keys or _DEFAULT_SENSITIVE_KEYS)}
|
|
59
|
+
|
|
60
|
+
# Handle Pydantic models without importing pydantic at module import time
|
|
61
|
+
if BaseModel is not None and isinstance(data, BaseModel): # type: ignore[arg-type]
|
|
62
|
+
try:
|
|
63
|
+
return sanitize_for_logging(data.model_dump(), keys, redaction)
|
|
64
|
+
except Exception:
|
|
65
|
+
# Fall through and try generic handling below
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
# Dict-like
|
|
69
|
+
if _is_mapping(data):
|
|
70
|
+
out: MutableMapping[str, Any] = type(data)() # preserve mapping type where possible
|
|
71
|
+
for k, v in data.items(): # type: ignore[assignment]
|
|
72
|
+
key_lower = str(k).lower()
|
|
73
|
+
if key_lower in keys:
|
|
74
|
+
out[k] = redaction
|
|
75
|
+
else:
|
|
76
|
+
out[k] = sanitize_for_logging(v, keys, redaction)
|
|
77
|
+
return out
|
|
78
|
+
|
|
79
|
+
# List/Tuple/Sequence
|
|
80
|
+
if _is_sequence(data):
|
|
81
|
+
return type(data)(sanitize_for_logging(v, keys, redaction) for v in data)
|
|
82
|
+
|
|
83
|
+
# Fallback: return as-is
|
|
84
|
+
return data
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
nv_ingest_api/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
2
|
-
nv_ingest_api/interface/__init__.py,sha256=
|
|
2
|
+
nv_ingest_api/interface/__init__.py,sha256=KPYjTT0p5I3aCGg6Bzs8igQQuXRY4mmUUVaK0F0uziI,10497
|
|
3
3
|
nv_ingest_api/interface/extract.py,sha256=o9OdoWxYsj-O4HsDe6wWbyd69OAueb2rlMtKSzOrKZo,38743
|
|
4
4
|
nv_ingest_api/interface/mutate.py,sha256=eZkd3sbHEJQiEPJyMbhewlPxQNMnL_Xur15icclnb-U,5934
|
|
5
5
|
nv_ingest_api/interface/store.py,sha256=aR3Cf19lq9Yo9AHlAy1VVcrOP2dgyN01yYhwxyTprkQ,8207
|
|
@@ -35,7 +35,7 @@ nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=IVbNcH_phMiRS
|
|
|
35
35
|
nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=SKmias2iZmAE6Q8WXxmFEjvLOZy-vXRoaRIPpi7Tuhs,22962
|
|
36
36
|
nv_ingest_api/internal/extract/pdf/engines/tika.py,sha256=6GyR2l6EsgNZl9jnYDXLeKNK9Fj2Mw9y2UWDq-eSkOc,3169
|
|
37
37
|
nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py,sha256=jrv2B4VZAH4PevAQrFz965qz8UyXq3rViiOTbGLejec,14908
|
|
38
|
-
nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=
|
|
38
|
+
nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=uTPTUTWQsGM1oeTUo49_hzwC5Yy9iEokrnS3z3WvtIo,5988
|
|
39
39
|
nv_ingest_api/internal/extract/pptx/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
|
|
40
40
|
nv_ingest_api/internal/extract/pptx/pptx_extractor.py,sha256=o-0P2dDyRFW37uQi_lKk6-eFozTcZvbq-2Y4I0EBMIY,7749
|
|
41
41
|
nv_ingest_api/internal/extract/pptx/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -69,22 +69,22 @@ nv_ingest_api/internal/primitives/tracing/logging.py,sha256=SSzIgS7afLH-e1C7VagY
|
|
|
69
69
|
nv_ingest_api/internal/primitives/tracing/tagging.py,sha256=xU534rb94uKnsSu0_DzyZcCSkIpa5SWTMxX7NSA3HoE,11671
|
|
70
70
|
nv_ingest_api/internal/schemas/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
71
71
|
nv_ingest_api/internal/schemas/extract/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
72
|
-
nv_ingest_api/internal/schemas/extract/extract_audio_schema.py,sha256=
|
|
73
|
-
nv_ingest_api/internal/schemas/extract/extract_chart_schema.py,sha256=
|
|
74
|
-
nv_ingest_api/internal/schemas/extract/extract_docx_schema.py,sha256=
|
|
72
|
+
nv_ingest_api/internal/schemas/extract/extract_audio_schema.py,sha256=3cjLcw5zFUb7WNbGPLDJN2KukF_hoOM7PE33UHici6w,3873
|
|
73
|
+
nv_ingest_api/internal/schemas/extract/extract_chart_schema.py,sha256=PZFJPLrLs8k5I5ufnp0XWrBjmbQVkkaxjb-xq-2rn2Q,4317
|
|
74
|
+
nv_ingest_api/internal/schemas/extract/extract_docx_schema.py,sha256=Bafw6lIXLS2PcEpU82D4Vb0OPD_FvGSr546IedsfR8o,3795
|
|
75
75
|
nv_ingest_api/internal/schemas/extract/extract_html_schema.py,sha256=lazpONTGZ6Fl420BGBAr6rogFGtlzBiZTc1uA694OIs,841
|
|
76
|
-
nv_ingest_api/internal/schemas/extract/extract_image_schema.py,sha256=
|
|
77
|
-
nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py,sha256=
|
|
78
|
-
nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py,sha256=
|
|
79
|
-
nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py,sha256=
|
|
80
|
-
nv_ingest_api/internal/schemas/extract/extract_table_schema.py,sha256=
|
|
76
|
+
nv_ingest_api/internal/schemas/extract/extract_image_schema.py,sha256=hXiHIKIZS2qb4u8g10m_S5CpeAzHvH3tSB_He5LYOfU,3800
|
|
77
|
+
nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py,sha256=Yb580-k1oYXanWMA17u0LGO5-AzUP80j6aTPGpjn920,4007
|
|
78
|
+
nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py,sha256=TTFzlPG5r1QFO2B5VWBPF69IXP1eQKrg16wo49hHDV0,6650
|
|
79
|
+
nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py,sha256=6KomEL6wdhpZVnE2SiOeZJNhyfHIQ2sARdfm0R16uCM,3795
|
|
80
|
+
nv_ingest_api/internal/schemas/extract/extract_table_schema.py,sha256=npBEGXkVaShOiHrc8FQ-25kHhPRD8WB6dRn-T5TScKc,3969
|
|
81
81
|
nv_ingest_api/internal/schemas/message_brokers/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
|
|
82
82
|
nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py,sha256=4xTSFE_vH7yZE9RRJRflFAG9hNXIaF6K020M_xA7ylw,1351
|
|
83
83
|
nv_ingest_api/internal/schemas/message_brokers/request_schema.py,sha256=LZX_wXDxTamVFqTQs2Yd8uvWyPE5mddHAWSU4PtfEIQ,966
|
|
84
84
|
nv_ingest_api/internal/schemas/message_brokers/response_schema.py,sha256=4b275HlzBSzpmuE2wdoeaGKPCdKki3wuWldtRIfrj8w,727
|
|
85
85
|
nv_ingest_api/internal/schemas/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
86
86
|
nv_ingest_api/internal/schemas/meta/base_model_noext.py,sha256=8hXU1uuiqZ6t8EsoZ8vlC5EFf2zSZrKEX133FcfZMwI,316
|
|
87
|
-
nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=
|
|
87
|
+
nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=cIpoesvIs0dR6s8dGjGHL246k5kf7hDmdhA48i8Si7s,10253
|
|
88
88
|
nv_ingest_api/internal/schemas/meta/metadata_schema.py,sha256=VnAzkSFat_ckI19mlwQTlFrvP6EZVCwyNl9bt51b8oU,7193
|
|
89
89
|
nv_ingest_api/internal/schemas/meta/udf.py,sha256=GgzqbZOlipQgMpDhbXLqbF8xrHenj_hMNqhR_P-1ynw,779
|
|
90
90
|
nv_ingest_api/internal/schemas/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
@@ -93,9 +93,9 @@ nv_ingest_api/internal/schemas/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQu
|
|
|
93
93
|
nv_ingest_api/internal/schemas/store/store_embedding_schema.py,sha256=tdKeiraim9CDL9htgp4oUSCoPMoO5PrHBnlXqDyCpMw,956
|
|
94
94
|
nv_ingest_api/internal/schemas/store/store_image_schema.py,sha256=p2LGij9i6sG6RYmsfdiQOiWIc2j-POjxYrNuMrp3ELU,1010
|
|
95
95
|
nv_ingest_api/internal/schemas/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
96
|
-
nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=
|
|
96
|
+
nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=fRMRwcWP-L8sfv2enNDt_W_CL0eC2i3b_1VCCtmr1K8,1188
|
|
97
97
|
nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py,sha256=31ThI5fr0yyENeJeE1xMAA-pxk1QVJLwM842zMate_k,429
|
|
98
|
-
nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=
|
|
98
|
+
nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=rzdhRANCqG9mOEoLargznuBwj1-MbEQUu2LDVi5vl50,1616
|
|
99
99
|
nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py,sha256=D9K8tvu-tkEBQkZo7uuRzgrHdGyM3ZcNycHbHy5HV2E,791
|
|
100
100
|
nv_ingest_api/internal/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
101
101
|
nv_ingest_api/internal/store/embed_text_upload.py,sha256=maxb4FPsBvWgvlrjAPEBlRZEFdJX5NxPG-p8kUbzV7I,9898
|
|
@@ -135,6 +135,7 @@ nv_ingest_api/util/introspection/class_inspect.py,sha256=sEYe37ICHdhXxSbD0JTCDg3
|
|
|
135
135
|
nv_ingest_api/util/introspection/function_inspect.py,sha256=_yCUUT1x3tLCsbXmUpTv_O8Qi6af4NiCpJ1qGfcksvw,2066
|
|
136
136
|
nv_ingest_api/util/logging/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
137
137
|
nv_ingest_api/util/logging/configuration.py,sha256=05KR3LOS-PCqU-Io__iiKG_Ds730eKxciklFfNeId3w,3126
|
|
138
|
+
nv_ingest_api/util/logging/sanitize.py,sha256=-dIbmvLTevrTRd18QKUQQMV4hBk6pStWP_7_VtDDctg,2584
|
|
138
139
|
nv_ingest_api/util/message_brokers/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
139
140
|
nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py,sha256=WaQ3CWIpIKWEivT5kL-bkmzcSQKLGFNFHdXHUJjqZFs,325
|
|
140
141
|
nv_ingest_api/util/message_brokers/simple_message_broker/broker.py,sha256=PekxaxVcAa9k1wgUtozlr04SW3sAeqYJE-wdVBZf9eo,17264
|
|
@@ -161,8 +162,8 @@ nv_ingest_api/util/string_processing/configuration.py,sha256=2HS08msccuPCT0fn_jf
|
|
|
161
162
|
nv_ingest_api/util/string_processing/yaml.py,sha256=6SW2O6wbXRhGbhETMbtXjYCZn53HeCNOP6a96AaxlHs,1454
|
|
162
163
|
nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
163
164
|
nv_ingest_api/util/system/hardware_info.py,sha256=1UFM8XE6M3pgQcpbVsCsqDQ7Dj-zzptL-XRE-DEu9UA,27213
|
|
164
|
-
nv_ingest_api-2025.8.
|
|
165
|
-
nv_ingest_api-2025.8.
|
|
166
|
-
nv_ingest_api-2025.8.
|
|
167
|
-
nv_ingest_api-2025.8.
|
|
168
|
-
nv_ingest_api-2025.8.
|
|
165
|
+
nv_ingest_api-2025.8.21.dev20250821.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
166
|
+
nv_ingest_api-2025.8.21.dev20250821.dist-info/METADATA,sha256=GO4G7PAWoOa3E0NQJKagm6Ir3ph9VmiSHl4MwpsnlYw,13947
|
|
167
|
+
nv_ingest_api-2025.8.21.dev20250821.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
168
|
+
nv_ingest_api-2025.8.21.dev20250821.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
|
|
169
|
+
nv_ingest_api-2025.8.21.dev20250821.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|