nv-ingest-api 2025.8.19.dev20250819__py3-none-any.whl → 2025.8.21.dev20250821__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (21) hide show
  1. nv_ingest_api/interface/__init__.py +14 -11
  2. nv_ingest_api/internal/extract/pdf/engines/llama.py +4 -1
  3. nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +3 -2
  4. nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +3 -3
  5. nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +2 -2
  6. nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +2 -2
  7. nv_ingest_api/internal/schemas/extract/extract_image_schema.py +2 -2
  8. nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +2 -2
  9. nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +3 -3
  10. nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +2 -2
  11. nv_ingest_api/internal/schemas/extract/extract_table_schema.py +2 -2
  12. nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +4 -4
  13. nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +20 -2
  14. nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +15 -2
  15. nv_ingest_api/internal/transform/embed_text.py +4 -1
  16. nv_ingest_api/util/logging/sanitize.py +84 -0
  17. {nv_ingest_api-2025.8.19.dev20250819.dist-info → nv_ingest_api-2025.8.21.dev20250821.dist-info}/METADATA +1 -1
  18. {nv_ingest_api-2025.8.19.dev20250819.dist-info → nv_ingest_api-2025.8.21.dev20250821.dist-info}/RECORD +21 -20
  19. {nv_ingest_api-2025.8.19.dev20250819.dist-info → nv_ingest_api-2025.8.21.dev20250821.dist-info}/WHEEL +0 -0
  20. {nv_ingest_api-2025.8.19.dev20250819.dist-info → nv_ingest_api-2025.8.21.dev20250821.dist-info}/licenses/LICENSE +0 -0
  21. {nv_ingest_api-2025.8.19.dev20250819.dist-info → nv_ingest_api-2025.8.21.dev20250821.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,7 @@ from typing import Dict, Any, Optional, List
11
11
  from pydantic import BaseModel
12
12
 
13
13
  from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import PDFiumConfigSchema, NemoRetrieverParseConfigSchema
14
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
14
15
 
15
16
  logger = logging.getLogger(__name__)
16
17
 
@@ -180,29 +181,31 @@ def extraction_interface_relay_constructor(api_fn, task_keys: Optional[List[str]
180
181
  if extractor_schema is None:
181
182
  extractor_schema = {f"{extract_method}_config": extraction_config_dict}
182
183
 
183
- # Log the task and extractor configurations for debugging
184
+ # Log the task and extractor configurations for debugging (sanitized)
184
185
  logger.debug("\n" + "=" * 80)
185
186
  logger.debug(f"DEBUG - API Function: {api_fn.__name__}")
186
187
  logger.debug(f"DEBUG - Extract Method: {extract_method}")
187
188
  logger.debug("-" * 80)
188
189
 
189
- # Format the task config as a string and log it
190
- task_config_str = pprint.pformat(task_config, width=100, sort_dicts=False)
191
- logger.debug(f"DEBUG - Task Config:\n{task_config_str}")
190
+ # Sanitize and format the task config as a string and log it
191
+ sanitized_task_config = sanitize_for_logging(task_config)
192
+ task_config_str = pprint.pformat(sanitized_task_config, width=100, sort_dicts=False)
193
+ logger.debug(f"DEBUG - Task Config (sanitized):\n{task_config_str}")
192
194
  logger.debug("-" * 80)
193
195
 
194
- # Format the extractor config as a string and log it
196
+ # Sanitize and format the extractor config as a string and log it
195
197
  if hasattr(extractor_schema, "model_dump"):
196
- extractor_config_str = pprint.pformat(extractor_schema.model_dump(), width=100, sort_dicts=False)
198
+ sanitized_extractor_config = sanitize_for_logging(extractor_schema.model_dump())
197
199
  else:
198
- extractor_config_str = pprint.pformat(extractor_schema, width=100, sort_dicts=False)
200
+ sanitized_extractor_config = sanitize_for_logging(extractor_schema)
201
+ extractor_config_str = pprint.pformat(sanitized_extractor_config, width=100, sort_dicts=False)
199
202
  logger.debug(f"DEBUG - Extractor Config Type: {type(extractor_schema)}")
200
- logger.debug(f"DEBUG - Extractor Config:\n{extractor_config_str}")
203
+ logger.debug(f"DEBUG - Extractor Config (sanitized):\n{extractor_config_str}")
201
204
  logger.debug("=" * 80 + "\n")
202
205
 
203
- # Call the backend API function.
204
- pprint.pprint(task_config)
205
- pprint.pprint(extractor_schema)
206
+ # Call the backend API function. Print sanitized configs for any debug consumers of stdout.
207
+ pprint.pprint(sanitized_task_config)
208
+ pprint.pprint(sanitized_extractor_config)
206
209
  result = api_fn(ledger, task_config, extractor_schema, execution_trace_log)
207
210
 
208
211
  # If the result is a tuple, return only the first element
@@ -193,7 +193,10 @@ async def async_llama_parse(
193
193
  A string of extracted text.
194
194
  """
195
195
  base_url = "https://api.cloud.llamaindex.ai/api/parsing"
196
- headers = {"Authorization": f"Bearer {api_key}"}
196
+ # Normalize in case api_key contains only whitespace; avoid sending an empty bearer token
197
+ _token = (api_key or "").strip()
198
+ _auth_value = f"Bearer {_token}" if _token else "Bearer <no key provided>"
199
+ headers = {"Authorization": _auth_value}
197
200
  mime_type = "application/pdf"
198
201
 
199
202
  try:
@@ -11,6 +11,7 @@ from typing import Any
11
11
  from typing import Dict
12
12
  from typing import List
13
13
  from typing import Optional
14
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
14
15
 
15
16
  import pandas as pd
16
17
  from nv_ingest_api.internal.extract.pdf.engines import adobe_extractor
@@ -131,7 +132,7 @@ def _orchestrate_row_extraction(
131
132
  method_config = extractor_config[config_key]
132
133
  else:
133
134
  # If no matching config is found, log a warning but don't fail
134
- logger.warning(f"No {config_key} found in extractor_config: {extractor_config}")
135
+ logger.warning(f"No {config_key} found in extractor_config: {sanitize_for_logging(extractor_config)}")
135
136
  method_config = None
136
137
 
137
138
  # Add the method-specific config to the parameters if available
@@ -141,7 +142,7 @@ def _orchestrate_row_extraction(
141
142
 
142
143
  # The resulting parameters constitute the complete extractor_config
143
144
  extractor_config = params
144
- logger.debug(f"Final extractor_config: {extractor_config}")
145
+ logger.debug(f"Final extractor_config: {sanitize_for_logging(extractor_config)}")
145
146
 
146
147
  result = _work_extract_pdf(
147
148
  pdf_stream=pdf_stream,
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import Optional
8
8
  from typing import Tuple
9
9
 
10
- from pydantic import BaseModel
10
+ from pydantic import BaseModel, Field
11
11
  from pydantic import root_validator
12
12
 
13
13
  logger = logging.getLogger(__name__)
@@ -42,12 +42,12 @@ class AudioConfigSchema(BaseModel):
42
42
  Pydantic config option to forbid extra fields.
43
43
  """
44
44
 
45
- auth_token: Optional[str] = None
45
+ auth_token: Optional[str] = Field(default=None, repr=False)
46
46
  audio_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
47
47
  audio_infer_protocol: Optional[str] = None
48
48
  function_id: Optional[str] = None
49
49
  use_ssl: Optional[bool] = None
50
- ssl_cert: Optional[str] = None
50
+ ssl_cert: Optional[str] = Field(default=None, repr=False)
51
51
  segment_audio: Optional[bool] = None
52
52
 
53
53
  @root_validator(pre=True)
@@ -6,7 +6,7 @@ import logging
6
6
  from typing import Optional
7
7
  from typing import Tuple
8
8
 
9
- from pydantic import field_validator, model_validator, ConfigDict, BaseModel
9
+ from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
@@ -44,7 +44,7 @@ class ChartExtractorConfigSchema(BaseModel):
44
44
  Pydantic config option to forbid extra fields.
45
45
  """
46
46
 
47
- auth_token: Optional[str] = None
47
+ auth_token: Optional[str] = Field(default=None, repr=False)
48
48
 
49
49
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
50
50
  yolox_infer_protocol: str = ""
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import Optional
8
8
  from typing import Tuple
9
9
 
10
- from pydantic import model_validator, ConfigDict, BaseModel
10
+ from pydantic import model_validator, ConfigDict, BaseModel, Field
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
@@ -41,7 +41,7 @@ class DocxConfigSchema(BaseModel):
41
41
  Pydantic config option to forbid extra fields.
42
42
  """
43
43
 
44
- auth_token: Optional[str] = None
44
+ auth_token: Optional[str] = Field(default=None, repr=False)
45
45
 
46
46
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
47
47
  yolox_infer_protocol: str = ""
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import Optional
8
8
  from typing import Tuple
9
9
 
10
- from pydantic import model_validator, ConfigDict, BaseModel
10
+ from pydantic import model_validator, ConfigDict, BaseModel, Field
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
@@ -41,7 +41,7 @@ class ImageConfigSchema(BaseModel):
41
41
  Pydantic config option to forbid extra fields.
42
42
  """
43
43
 
44
- auth_token: Optional[str] = None
44
+ auth_token: Optional[str] = Field(default=None, repr=False)
45
45
 
46
46
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
47
47
  yolox_infer_protocol: str = ""
@@ -6,7 +6,7 @@ import logging
6
6
  from typing import Optional
7
7
  from typing import Tuple
8
8
 
9
- from pydantic import field_validator, model_validator, ConfigDict, BaseModel
9
+ from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
@@ -40,7 +40,7 @@ class InfographicExtractorConfigSchema(BaseModel):
40
40
  Pydantic config option to forbid extra fields.
41
41
  """
42
42
 
43
- auth_token: Optional[str] = None
43
+ auth_token: Optional[str] = Field(default=None, repr=False)
44
44
 
45
45
  ocr_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
46
46
  ocr_infer_protocol: str = ""
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import Optional
8
8
  from typing import Tuple
9
9
 
10
- from pydantic import model_validator, ConfigDict, BaseModel
10
+ from pydantic import model_validator, ConfigDict, BaseModel, Field
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
@@ -41,7 +41,7 @@ class PDFiumConfigSchema(BaseModel):
41
41
  Pydantic config option to forbid extra fields.
42
42
  """
43
43
 
44
- auth_token: Optional[str] = None
44
+ auth_token: Optional[str] = Field(default=None, repr=False)
45
45
 
46
46
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
47
47
  yolox_infer_protocol: str = ""
@@ -123,7 +123,7 @@ class NemoRetrieverParseConfigSchema(BaseModel):
123
123
  Pydantic config option to forbid extra fields.
124
124
  """
125
125
 
126
- auth_token: Optional[str] = None
126
+ auth_token: Optional[str] = Field(default=None, repr=False)
127
127
 
128
128
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
129
129
  yolox_infer_protocol: str = ""
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import Optional
8
8
  from typing import Tuple
9
9
 
10
- from pydantic import model_validator, ConfigDict, BaseModel
10
+ from pydantic import model_validator, ConfigDict, BaseModel, Field
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
@@ -41,7 +41,7 @@ class PPTXConfigSchema(BaseModel):
41
41
  Pydantic config option to forbid extra fields.
42
42
  """
43
43
 
44
- auth_token: Optional[str] = None
44
+ auth_token: Optional[str] = Field(default=None, repr=False)
45
45
 
46
46
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
47
47
  yolox_infer_protocol: str = ""
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import Optional
8
8
  from typing import Tuple
9
9
 
10
- from pydantic import field_validator, model_validator, ConfigDict, BaseModel
10
+ from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
11
11
 
12
12
 
13
13
  logger = logging.getLogger(__name__)
@@ -42,7 +42,7 @@ class TableExtractorConfigSchema(BaseModel):
42
42
  Pydantic config option to forbid extra fields.
43
43
  """
44
44
 
45
- auth_token: Optional[str] = None
45
+ auth_token: Optional[str] = Field(default=None, repr=False)
46
46
 
47
47
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
48
48
  yolox_infer_protocol: str = ""
@@ -73,7 +73,7 @@ class IngestTaskStoreSchema(BaseModelNoExt):
73
73
 
74
74
  # Captioning: All fields are optional and override default parameters.
75
75
  class IngestTaskCaptionSchema(BaseModelNoExt):
76
- api_key: Optional[str] = None
76
+ api_key: Optional[str] = Field(default=None, repr=False)
77
77
  endpoint_url: Optional[str] = None
78
78
  prompt: Optional[str] = None
79
79
  model_name: Optional[str] = None
@@ -105,7 +105,7 @@ class IngestTaskDedupSchema(BaseModelNoExt):
105
105
  class IngestTaskEmbedSchema(BaseModelNoExt):
106
106
  endpoint_url: Optional[str] = None
107
107
  model_name: Optional[str] = None
108
- api_key: Optional[str] = None
108
+ api_key: Optional[str] = Field(default=None, repr=False)
109
109
  filter_errors: bool = False
110
110
  text_elements_modality: Optional[str] = None
111
111
  image_elements_modality: Optional[str] = None
@@ -121,13 +121,13 @@ class IngestTaskVdbUploadSchema(BaseModelNoExt):
121
121
 
122
122
 
123
123
  class IngestTaskAudioExtraction(BaseModelNoExt):
124
- auth_token: Optional[str] = None
124
+ auth_token: Optional[str] = Field(default=None, repr=False)
125
125
  grpc_endpoint: Optional[str] = None
126
126
  http_endpoint: Optional[str] = None
127
127
  infer_protocol: Optional[str] = None
128
128
  function_id: Optional[str] = None
129
129
  use_ssl: Optional[bool] = None
130
- ssl_cert: Optional[str] = None
130
+ ssl_cert: Optional[str] = Field(default=None, repr=False)
131
131
  segment_audio: Optional[bool] = None
132
132
 
133
133
 
@@ -3,13 +3,31 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
 
6
- from pydantic import ConfigDict, BaseModel
6
+ from pydantic import ConfigDict, BaseModel, model_validator, field_validator, Field
7
7
 
8
8
 
9
9
  class ImageCaptionExtractionSchema(BaseModel):
10
- api_key: str = "api_key"
10
+ api_key: str = Field(default="", repr=False)
11
11
  endpoint_url: str = "https://integrate.api.nvidia.com/v1/chat/completions"
12
12
  prompt: str = "Caption the content of this image:"
13
13
  model_name: str = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
14
14
  raise_on_failure: bool = False
15
15
  model_config = ConfigDict(extra="forbid")
16
+
17
+ @field_validator("api_key", mode="before")
18
+ @classmethod
19
+ def _coerce_api_key_none(cls, v):
20
+ return "" if v is None else v
21
+
22
+ @model_validator(mode="before")
23
+ @classmethod
24
+ def _coerce_none_to_empty(cls, values):
25
+ """Allow None for string fields where empty string is acceptable.
26
+
27
+ Specifically, convert api_key=None to api_key="" so validation passes
28
+ when no API key is supplied.
29
+ """
30
+ if isinstance(values, dict):
31
+ if values.get("api_key") is None:
32
+ values["api_key"] = ""
33
+ return values
@@ -5,7 +5,7 @@
5
5
 
6
6
  import logging
7
7
 
8
- from pydantic import ConfigDict, BaseModel, Field
8
+ from pydantic import ConfigDict, BaseModel, Field, model_validator, field_validator
9
9
 
10
10
  from nv_ingest_api.util.logging.configuration import LogLevel
11
11
 
@@ -13,7 +13,7 @@ logger = logging.getLogger(__name__)
13
13
 
14
14
 
15
15
  class TextEmbeddingSchema(BaseModel):
16
- api_key: str = Field(default="api_key")
16
+ api_key: str = Field(default="", repr=False)
17
17
  batch_size: int = Field(default=4)
18
18
  embedding_model: str = Field(default="nvidia/llama-3.2-nv-embedqa-1b-v2")
19
19
  embedding_nim_endpoint: str = Field(default="http://embedding:8000/v1")
@@ -28,3 +28,16 @@ class TextEmbeddingSchema(BaseModel):
28
28
  audio_elements_modality: str = Field(default="text")
29
29
 
30
30
  model_config = ConfigDict(extra="forbid")
31
+
32
+ @field_validator("api_key", mode="before")
33
+ @classmethod
34
+ def _coerce_api_key_none(cls, v):
35
+ return "" if v is None else v
36
+
37
+ @model_validator(mode="before")
38
+ @classmethod
39
+ def _coerce_none_to_empty(cls, values):
40
+ """Convert api_key=None to empty string so validation passes when key is omitted."""
41
+ if isinstance(values, dict) and values.get("api_key") is None:
42
+ values["api_key"] = ""
43
+ return values
@@ -75,8 +75,11 @@ def _make_async_request(
75
75
  response = {}
76
76
 
77
77
  try:
78
+ # Normalize API key to avoid sending an empty bearer token via SDK internals
79
+ _token = (api_key or "").strip()
80
+ _api_key = _token if _token else "<no key provided>"
78
81
  client = OpenAI(
79
- api_key=api_key,
82
+ api_key=_api_key,
80
83
  base_url=embedding_nim_endpoint,
81
84
  )
82
85
 
@@ -0,0 +1,84 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, Mapping, MutableMapping, Sequence, Set
8
+
9
+ try:
10
+ # Pydantic is optional at runtime for this helper; import if available
11
+ from pydantic import BaseModel # type: ignore
12
+ except Exception: # pragma: no cover - pydantic always present in this repo
13
+ BaseModel = None # type: ignore
14
+
15
+
16
+ _DEFAULT_SENSITIVE_KEYS: Set[str] = {
17
+ "access_token",
18
+ "api_key",
19
+ "authorization",
20
+ "auth_token",
21
+ "client_secret",
22
+ "hf_access_token",
23
+ "hugging_face_access_token",
24
+ "password",
25
+ "refresh_token",
26
+ "secret",
27
+ "ssl_cert",
28
+ "x-api-key",
29
+ }
30
+
31
+ _REDACTION = "***REDACTED***"
32
+
33
+
34
+ def _is_mapping(obj: Any) -> bool:
35
+ try:
36
+ return isinstance(obj, Mapping)
37
+ except Exception:
38
+ return False
39
+
40
+
41
+ def _is_sequence(obj: Any) -> bool:
42
+ # Exclude strings/bytes from sequences we want to traverse
43
+ return isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray))
44
+
45
+
46
+ def sanitize_for_logging(
47
+ data: Any,
48
+ sensitive_keys: Set[str] | None = None,
49
+ redaction: str = _REDACTION,
50
+ ) -> Any:
51
+ """
52
+ Recursively sanitize common secret fields from dicts, lists, tuples, and Pydantic models.
53
+
54
+ - Key comparison is case-insensitive and matches exact keys only.
55
+ - Does not mutate input; returns a sanitized deep copy.
56
+ - For Pydantic BaseModel instances, uses model_dump() before redaction.
57
+ """
58
+ keys = {k.lower() for k in (sensitive_keys or _DEFAULT_SENSITIVE_KEYS)}
59
+
60
+ # Handle Pydantic models without importing pydantic at module import time
61
+ if BaseModel is not None and isinstance(data, BaseModel): # type: ignore[arg-type]
62
+ try:
63
+ return sanitize_for_logging(data.model_dump(), keys, redaction)
64
+ except Exception:
65
+ # Fall through and try generic handling below
66
+ pass
67
+
68
+ # Dict-like
69
+ if _is_mapping(data):
70
+ out: MutableMapping[str, Any] = type(data)() # preserve mapping type where possible
71
+ for k, v in data.items(): # type: ignore[assignment]
72
+ key_lower = str(k).lower()
73
+ if key_lower in keys:
74
+ out[k] = redaction
75
+ else:
76
+ out[k] = sanitize_for_logging(v, keys, redaction)
77
+ return out
78
+
79
+ # List/Tuple/Sequence
80
+ if _is_sequence(data):
81
+ return type(data)(sanitize_for_logging(v, keys, redaction) for v in data)
82
+
83
+ # Fallback: return as-is
84
+ return data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.8.19.dev20250819
3
+ Version: 2025.8.21.dev20250821
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -1,5 +1,5 @@
1
1
  nv_ingest_api/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
2
- nv_ingest_api/interface/__init__.py,sha256=ltWlfmtCewHSRK4B7DF__QvlSUPuliz58JEcEIeIgI0,10134
2
+ nv_ingest_api/interface/__init__.py,sha256=KPYjTT0p5I3aCGg6Bzs8igQQuXRY4mmUUVaK0F0uziI,10497
3
3
  nv_ingest_api/interface/extract.py,sha256=o9OdoWxYsj-O4HsDe6wWbyd69OAueb2rlMtKSzOrKZo,38743
4
4
  nv_ingest_api/interface/mutate.py,sha256=eZkd3sbHEJQiEPJyMbhewlPxQNMnL_Xur15icclnb-U,5934
5
5
  nv_ingest_api/interface/store.py,sha256=aR3Cf19lq9Yo9AHlAy1VVcrOP2dgyN01yYhwxyTprkQ,8207
@@ -30,12 +30,12 @@ nv_ingest_api/internal/extract/pdf/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusX
30
30
  nv_ingest_api/internal/extract/pdf/pdf_extractor.py,sha256=CxtWaD6mql9MEqSdk2CfSQ9T-Bn87beBkCOuGGjxGt8,2934
31
31
  nv_ingest_api/internal/extract/pdf/engines/__init__.py,sha256=u4GnAZmDKRl0RwYGIRiozIRw70Kybw3A72-lcKFeoTI,582
32
32
  nv_ingest_api/internal/extract/pdf/engines/adobe.py,sha256=VT0dEqkU-y2uGkaCqxtKYov_Q8R1028UQVBchgMLca4,17466
33
- nv_ingest_api/internal/extract/pdf/engines/llama.py,sha256=PpKTqS8jGHBV6mKLGZWwjpfT8ga6Fy8ffrvL-gPAf2c,8182
33
+ nv_ingest_api/internal/extract/pdf/engines/llama.py,sha256=MwzM-n2tu0FHM0wDe_0mONLlzHrPte7EOTuPtzCh7Zs,8384
34
34
  nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=IVbNcH_phMiRSxnkZ04pGfQrPJ-x1zVR3hXyhxv7juc,22977
35
35
  nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=SKmias2iZmAE6Q8WXxmFEjvLOZy-vXRoaRIPpi7Tuhs,22962
36
36
  nv_ingest_api/internal/extract/pdf/engines/tika.py,sha256=6GyR2l6EsgNZl9jnYDXLeKNK9Fj2Mw9y2UWDq-eSkOc,3169
37
37
  nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py,sha256=jrv2B4VZAH4PevAQrFz965qz8UyXq3rViiOTbGLejec,14908
38
- nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=4bvN6LsPksLicI6jM0JqbJFiOZNHEcuc8MVVW4XfgV8,5875
38
+ nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=uTPTUTWQsGM1oeTUo49_hzwC5Yy9iEokrnS3z3WvtIo,5988
39
39
  nv_ingest_api/internal/extract/pptx/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
40
40
  nv_ingest_api/internal/extract/pptx/pptx_extractor.py,sha256=o-0P2dDyRFW37uQi_lKk6-eFozTcZvbq-2Y4I0EBMIY,7749
41
41
  nv_ingest_api/internal/extract/pptx/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -69,22 +69,22 @@ nv_ingest_api/internal/primitives/tracing/logging.py,sha256=SSzIgS7afLH-e1C7VagY
69
69
  nv_ingest_api/internal/primitives/tracing/tagging.py,sha256=xU534rb94uKnsSu0_DzyZcCSkIpa5SWTMxX7NSA3HoE,11671
70
70
  nv_ingest_api/internal/schemas/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
71
71
  nv_ingest_api/internal/schemas/extract/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
72
- nv_ingest_api/internal/schemas/extract/extract_audio_schema.py,sha256=W-nEBriqiNkjpaQ5AT_8LhtVXlW8AhlcftmoeQQtKAs,3812
73
- nv_ingest_api/internal/schemas/extract/extract_chart_schema.py,sha256=wDcvQ5XtOjIBGSWtNjQPiPtVKrSOYqbf2mnLrhfhue4,4283
74
- nv_ingest_api/internal/schemas/extract/extract_docx_schema.py,sha256=M2N7WjMNvSemHcJHWeNUD_kFG0wC5VE2W3K6SVrJqvA,3761
72
+ nv_ingest_api/internal/schemas/extract/extract_audio_schema.py,sha256=3cjLcw5zFUb7WNbGPLDJN2KukF_hoOM7PE33UHici6w,3873
73
+ nv_ingest_api/internal/schemas/extract/extract_chart_schema.py,sha256=PZFJPLrLs8k5I5ufnp0XWrBjmbQVkkaxjb-xq-2rn2Q,4317
74
+ nv_ingest_api/internal/schemas/extract/extract_docx_schema.py,sha256=Bafw6lIXLS2PcEpU82D4Vb0OPD_FvGSr546IedsfR8o,3795
75
75
  nv_ingest_api/internal/schemas/extract/extract_html_schema.py,sha256=lazpONTGZ6Fl420BGBAr6rogFGtlzBiZTc1uA694OIs,841
76
- nv_ingest_api/internal/schemas/extract/extract_image_schema.py,sha256=GC4xV8Z9TPLOuxlEtf2fbklSSp8ETGMrDpZgMQ02UwA,3766
77
- nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py,sha256=z42cs7w-U-IUCMGByp5e_iBUZ7KCl5vTIXkP64ty6gY,3973
78
- nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py,sha256=G9g1lEORmryUWTzDyZ0vHAuPnVMK7VaRx0E4xzmAw3Q,6589
79
- nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py,sha256=5dT0kv-Mmpe5KW-BZc1JOW3rUlgzVZI0rpB79NWytmw,3761
80
- nv_ingest_api/internal/schemas/extract/extract_table_schema.py,sha256=vd_1mf_LmQGvSTpQCuWr6ubsiav4TMhp_SpKGO-6RLc,3935
76
+ nv_ingest_api/internal/schemas/extract/extract_image_schema.py,sha256=hXiHIKIZS2qb4u8g10m_S5CpeAzHvH3tSB_He5LYOfU,3800
77
+ nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py,sha256=Yb580-k1oYXanWMA17u0LGO5-AzUP80j6aTPGpjn920,4007
78
+ nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py,sha256=TTFzlPG5r1QFO2B5VWBPF69IXP1eQKrg16wo49hHDV0,6650
79
+ nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py,sha256=6KomEL6wdhpZVnE2SiOeZJNhyfHIQ2sARdfm0R16uCM,3795
80
+ nv_ingest_api/internal/schemas/extract/extract_table_schema.py,sha256=npBEGXkVaShOiHrc8FQ-25kHhPRD8WB6dRn-T5TScKc,3969
81
81
  nv_ingest_api/internal/schemas/message_brokers/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
82
82
  nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py,sha256=4xTSFE_vH7yZE9RRJRflFAG9hNXIaF6K020M_xA7ylw,1351
83
83
  nv_ingest_api/internal/schemas/message_brokers/request_schema.py,sha256=LZX_wXDxTamVFqTQs2Yd8uvWyPE5mddHAWSU4PtfEIQ,966
84
84
  nv_ingest_api/internal/schemas/message_brokers/response_schema.py,sha256=4b275HlzBSzpmuE2wdoeaGKPCdKki3wuWldtRIfrj8w,727
85
85
  nv_ingest_api/internal/schemas/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
86
86
  nv_ingest_api/internal/schemas/meta/base_model_noext.py,sha256=8hXU1uuiqZ6t8EsoZ8vlC5EFf2zSZrKEX133FcfZMwI,316
87
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=En7wcLPB6hvfAXx2-xZM49wbJXmRX2Ckc0i6edqn21c,10145
87
+ nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=cIpoesvIs0dR6s8dGjGHL246k5kf7hDmdhA48i8Si7s,10253
88
88
  nv_ingest_api/internal/schemas/meta/metadata_schema.py,sha256=VnAzkSFat_ckI19mlwQTlFrvP6EZVCwyNl9bt51b8oU,7193
89
89
  nv_ingest_api/internal/schemas/meta/udf.py,sha256=GgzqbZOlipQgMpDhbXLqbF8xrHenj_hMNqhR_P-1ynw,779
90
90
  nv_ingest_api/internal/schemas/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -93,16 +93,16 @@ nv_ingest_api/internal/schemas/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQu
93
93
  nv_ingest_api/internal/schemas/store/store_embedding_schema.py,sha256=tdKeiraim9CDL9htgp4oUSCoPMoO5PrHBnlXqDyCpMw,956
94
94
  nv_ingest_api/internal/schemas/store/store_image_schema.py,sha256=p2LGij9i6sG6RYmsfdiQOiWIc2j-POjxYrNuMrp3ELU,1010
95
95
  nv_ingest_api/internal/schemas/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
96
- nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=gRJCfEGrJXErMF_GsZDjyDR8HOrLrUNxz2BVS6LSPY8,542
96
+ nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=fRMRwcWP-L8sfv2enNDt_W_CL0eC2i3b_1VCCtmr1K8,1188
97
97
  nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py,sha256=31ThI5fr0yyENeJeE1xMAA-pxk1QVJLwM842zMate_k,429
98
- nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=RZCISA8CUqKiY8eJuk4uWxzo4PZ-fuYdzMO7_LYFkoM,1117
98
+ nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=rzdhRANCqG9mOEoLargznuBwj1-MbEQUu2LDVi5vl50,1616
99
99
  nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py,sha256=D9K8tvu-tkEBQkZo7uuRzgrHdGyM3ZcNycHbHy5HV2E,791
100
100
  nv_ingest_api/internal/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
101
101
  nv_ingest_api/internal/store/embed_text_upload.py,sha256=maxb4FPsBvWgvlrjAPEBlRZEFdJX5NxPG-p8kUbzV7I,9898
102
102
  nv_ingest_api/internal/store/image_upload.py,sha256=GNlY4k3pfcHv3lzXxkbmGLeHFsf9PI25bkBn6Xn9h3I,9654
103
103
  nv_ingest_api/internal/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
104
104
  nv_ingest_api/internal/transform/caption_image.py,sha256=0ILCG2F8ESqKtZiPUM-6F1BHUflFZ76Dzi2GNzkE-lU,8517
105
- nv_ingest_api/internal/transform/embed_text.py,sha256=AdUXVosBHuZadaFLi9_RVB7vB_hjTziCLUeBZ1oWGZo,20003
105
+ nv_ingest_api/internal/transform/embed_text.py,sha256=LB_2Zvw6plc7uOWT2QN13aDu2qFumXzl_RB3ZcZbLGs,20191
106
106
  nv_ingest_api/internal/transform/split_text.py,sha256=LAtInGVuydH43UwjNMQWFVC1A6NdhXP_dZup2xX4qEo,7745
107
107
  nv_ingest_api/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
108
108
  nv_ingest_api/util/control_message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -135,6 +135,7 @@ nv_ingest_api/util/introspection/class_inspect.py,sha256=sEYe37ICHdhXxSbD0JTCDg3
135
135
  nv_ingest_api/util/introspection/function_inspect.py,sha256=_yCUUT1x3tLCsbXmUpTv_O8Qi6af4NiCpJ1qGfcksvw,2066
136
136
  nv_ingest_api/util/logging/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
137
  nv_ingest_api/util/logging/configuration.py,sha256=05KR3LOS-PCqU-Io__iiKG_Ds730eKxciklFfNeId3w,3126
138
+ nv_ingest_api/util/logging/sanitize.py,sha256=-dIbmvLTevrTRd18QKUQQMV4hBk6pStWP_7_VtDDctg,2584
138
139
  nv_ingest_api/util/message_brokers/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
139
140
  nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py,sha256=WaQ3CWIpIKWEivT5kL-bkmzcSQKLGFNFHdXHUJjqZFs,325
140
141
  nv_ingest_api/util/message_brokers/simple_message_broker/broker.py,sha256=PekxaxVcAa9k1wgUtozlr04SW3sAeqYJE-wdVBZf9eo,17264
@@ -161,8 +162,8 @@ nv_ingest_api/util/string_processing/configuration.py,sha256=2HS08msccuPCT0fn_jf
161
162
  nv_ingest_api/util/string_processing/yaml.py,sha256=6SW2O6wbXRhGbhETMbtXjYCZn53HeCNOP6a96AaxlHs,1454
162
163
  nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
163
164
  nv_ingest_api/util/system/hardware_info.py,sha256=1UFM8XE6M3pgQcpbVsCsqDQ7Dj-zzptL-XRE-DEu9UA,27213
164
- nv_ingest_api-2025.8.19.dev20250819.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
165
- nv_ingest_api-2025.8.19.dev20250819.dist-info/METADATA,sha256=xe42ZKv6dW8r2y2z5OU01FIQ0PRZF4MiK591993tbGk,13947
166
- nv_ingest_api-2025.8.19.dev20250819.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
167
- nv_ingest_api-2025.8.19.dev20250819.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
168
- nv_ingest_api-2025.8.19.dev20250819.dist-info/RECORD,,
165
+ nv_ingest_api-2025.8.21.dev20250821.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
166
+ nv_ingest_api-2025.8.21.dev20250821.dist-info/METADATA,sha256=GO4G7PAWoOa3E0NQJKagm6Ir3ph9VmiSHl4MwpsnlYw,13947
167
+ nv_ingest_api-2025.8.21.dev20250821.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
168
+ nv_ingest_api-2025.8.21.dev20250821.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
169
+ nv_ingest_api-2025.8.21.dev20250821.dist-info/RECORD,,