nv-ingest-api 2025.10.15.dev20251015__py3-none-any.whl → 2025.10.16.dev20251016__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

@@ -758,8 +758,8 @@ def get_ocr_model_name(ocr_grpc_endpoint=None, default_model_name=DEFAULT_OCR_MO
758
758
  if ocr_model_name is not None:
759
759
  return ocr_model_name
760
760
 
761
- # 2. If no gRPC endpoint is provided, fall back to the default immediately.
762
- if not ocr_grpc_endpoint:
761
+ # 2. If no gRPC endpoint is provided or the endpoint is a NVCF endpoint, fall back to the default immediately.
762
+ if (not ocr_grpc_endpoint) or ("grpc.nvcf.nvidia.com" in ocr_grpc_endpoint):
763
763
  logger.debug(f"No OCR gRPC endpoint provided. Falling back to default model name '{default_model_name}'.")
764
764
  return default_model_name
765
765
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.10.15.dev20251015
3
+ Version: 2025.10.16.dev20251016
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -58,7 +58,7 @@ nv_ingest_api/internal/primitives/nim/model_interface/decorators.py,sha256=qwubk
58
58
  nv_ingest_api/internal/primitives/nim/model_interface/deplot.py,sha256=TvKdk6PTuI1WNhRmNNrvygaI_DIutkJkDL-XdtLZQac,10787
59
59
  nv_ingest_api/internal/primitives/nim/model_interface/helpers.py,sha256=jqbEbavvr9giODpzsGQSRDu5yZ4YfNfKAQfqUm9yUDI,11698
60
60
  nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py,sha256=WysjDZeegclO3mZgVcGOwzWbr8wSI4pWRiYD4iC2EXo,7098
61
- nv_ingest_api/internal/primitives/nim/model_interface/ocr.py,sha256=jFPB0h2SAq4RrZqiHdh28bNHvrkm0RWKZEx-ei-ahlU,29521
61
+ nv_ingest_api/internal/primitives/nim/model_interface/ocr.py,sha256=WuX-veTC510TbvMWYGYP6WCzjYCbUBAUc5ovJUWCrFU,29607
62
62
  nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py,sha256=3XXJkeJaVFe_iIfNn_bDYn79JN20besjZHiNZ5dEnZQ,12778
63
63
  nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py,sha256=lFhppNqrq5X_fzbCWKphvZQMzaJd3gHrkWsyJORzFrU,5010
64
64
  nv_ingest_api/internal/primitives/nim/model_interface/vlm.py,sha256=sUDKKlVqKjiHEGr2D04I7S4pDfnLR8b-NplV1pf5GVQ,6240
@@ -164,10 +164,10 @@ nv_ingest_api/util/string_processing/configuration.py,sha256=2HS08msccuPCT0fn_jf
164
164
  nv_ingest_api/util/string_processing/yaml.py,sha256=4Zdmc4474lUZn6kznqaNTlQJwsmRnnJQZ-DvAWLu-zo,2678
165
165
  nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
166
  nv_ingest_api/util/system/hardware_info.py,sha256=1UFM8XE6M3pgQcpbVsCsqDQ7Dj-zzptL-XRE-DEu9UA,27213
167
- nv_ingest_api-2025.10.15.dev20251015.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
167
+ nv_ingest_api-2025.10.16.dev20251016.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
168
168
  udfs/__init__.py,sha256=pXFqPgXIUqHDfj7SAR1Q19tt8KwGv_iMvhHyziz4AYM,205
169
- udfs/llm_summarizer_udf.py,sha256=t_ZFoz0e03uECYcRw4IabRj0GBlwAoJkJn13NL2wbsI,7217
170
- nv_ingest_api-2025.10.15.dev20251015.dist-info/METADATA,sha256=C3OVAyEQD5iSPzluBsDanslOq8aNRojyt3QDA8yXwGQ,14086
171
- nv_ingest_api-2025.10.15.dev20251015.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
172
- nv_ingest_api-2025.10.15.dev20251015.dist-info/top_level.txt,sha256=I1lseG9FF0CH93SPx4kFblsxFuv190cfzaas_CLNIiw,19
173
- nv_ingest_api-2025.10.15.dev20251015.dist-info/RECORD,,
169
+ udfs/llm_summarizer_udf.py,sha256=9kM3W08FVnNHIRrDDZEuQRnPJmOdU2N-YTcnaBy8NdU,7267
170
+ nv_ingest_api-2025.10.16.dev20251016.dist-info/METADATA,sha256=fsVHTpz4y-WxO1gDJOu87u8fSfAxOFrabEkcYf-Wvg8,14086
171
+ nv_ingest_api-2025.10.16.dev20251016.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
172
+ nv_ingest_api-2025.10.16.dev20251016.dist-info/top_level.txt,sha256=I1lseG9FF0CH93SPx4kFblsxFuv190cfzaas_CLNIiw,19
173
+ nv_ingest_api-2025.10.16.dev20251016.dist-info/RECORD,,
@@ -2,8 +2,9 @@
2
2
  """
3
3
  LLM Content Summarizer UDF for NV-Ingest Pipeline
4
4
 
5
- This UDF uses an LLM to generate concise summaries of text content chunks. These summaries are added to the metadata
6
- for enhanced downstream processing and search capabilities.
5
+ Generates document summaries using NVIDIA-hosted LLMs. This production UDF demonstrates how to extract the pipeline
6
+ payload, run custom code (summarization), and inject results into the metadata for downstream usecases (such as
7
+ retrieval).
7
8
 
8
9
  These variables can be set in the environment before running the pipeline. These can be treated as kwargs.
9
10
  - NVIDIA_API_KEY: API key for NVIDIA NIM endpoints (required)
@@ -14,16 +15,14 @@ These variables can be set in the environment before running the pipeline. These
14
15
  - MAX_CONTENT_LENGTH: Maximum content length to send to API (default: 12000)
15
16
  TODO: Implement this
16
17
  - NUM_CHUNKS: (Optional) Number of first and last pages to summarize. default=1
18
+
19
+ More info can be found in `examples/udfs/README.md`
17
20
  """
18
21
 
19
22
  import logging
20
23
  import os
21
24
  import time
22
25
 
23
- # REMOVE BEFORE MERGING
24
- # import yaml
25
- # from pathlib import Path
26
-
27
26
 
28
27
  logger = logging.getLogger(__name__)
29
28