nv-ingest-api 2025.10.16.dev20251016__py3-none-any.whl → 2025.10.17.dev20251017__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.10.16.dev20251016
3
+ Version: 2025.10.17.dev20251017
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -164,10 +164,10 @@ nv_ingest_api/util/string_processing/configuration.py,sha256=2HS08msccuPCT0fn_jf
164
164
  nv_ingest_api/util/string_processing/yaml.py,sha256=4Zdmc4474lUZn6kznqaNTlQJwsmRnnJQZ-DvAWLu-zo,2678
165
165
  nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
166
  nv_ingest_api/util/system/hardware_info.py,sha256=1UFM8XE6M3pgQcpbVsCsqDQ7Dj-zzptL-XRE-DEu9UA,27213
167
- nv_ingest_api-2025.10.16.dev20251016.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
167
+ nv_ingest_api-2025.10.17.dev20251017.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
168
168
  udfs/__init__.py,sha256=pXFqPgXIUqHDfj7SAR1Q19tt8KwGv_iMvhHyziz4AYM,205
169
- udfs/llm_summarizer_udf.py,sha256=9kM3W08FVnNHIRrDDZEuQRnPJmOdU2N-YTcnaBy8NdU,7267
170
- nv_ingest_api-2025.10.16.dev20251016.dist-info/METADATA,sha256=fsVHTpz4y-WxO1gDJOu87u8fSfAxOFrabEkcYf-Wvg8,14086
171
- nv_ingest_api-2025.10.16.dev20251016.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
172
- nv_ingest_api-2025.10.16.dev20251016.dist-info/top_level.txt,sha256=I1lseG9FF0CH93SPx4kFblsxFuv190cfzaas_CLNIiw,19
173
- nv_ingest_api-2025.10.16.dev20251016.dist-info/RECORD,,
169
+ udfs/llm_summarizer_udf.py,sha256=lH5c5NHoT-5ecHC3og_40u1Ujta8SpsKU4X0e4wzbMU,7314
170
+ nv_ingest_api-2025.10.17.dev20251017.dist-info/METADATA,sha256=Rbhi8IKrU7imXMtP7ejIy67EWDbnmdtwa6xLbr9t8pM,14086
171
+ nv_ingest_api-2025.10.17.dev20251017.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
172
+ nv_ingest_api-2025.10.17.dev20251017.dist-info/top_level.txt,sha256=I1lseG9FF0CH93SPx4kFblsxFuv190cfzaas_CLNIiw,19
173
+ nv_ingest_api-2025.10.17.dev20251017.dist-info/RECORD,,
@@ -84,6 +84,7 @@ def content_summarizer(control_message: "IngestControlMessage") -> "IngestContro
84
84
  # According to docs/docs/extraction/user_defined_functions.md#understanding-the-dataframe-payload
85
85
  # the rows are not necessarily pages. they are chunks of data extracted from the document. in order to select
86
86
  # pages, it must require parsing the payload to see which chunks correspond to which pages
87
+ original_df = df.copy()
87
88
  if len(df) > 1:
88
89
  # TODO: add feature to select N first and last chunks
89
90
  df = df.iloc[[0, -1]]
@@ -107,10 +108,11 @@ def content_summarizer(control_message: "IngestControlMessage") -> "IngestContro
107
108
  if not stats["failed"]:
108
109
  stats["tokens"] = _estimate_tokens(content)
109
110
  logger.info("Summarized %d tokens in %f seconds using %s", stats["tokens"], stats["duration"], model_name)
110
- _store_summary(df, summary, model_name)
111
+ _store_summary(original_df, summary, model_name)
111
112
 
112
113
  # Update the control message with modified DataFrame
113
- control_message.payload(df)
114
+ control_message.payload(original_df)
115
+
114
116
  else:
115
117
  logger.warning("%s failed to summarize content", model_name)
116
118