nv-ingest-api 2025.10.16.dev20251016__py3-none-any.whl → 2025.10.18.dev20251018__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- {nv_ingest_api-2025.10.16.dev20251016.dist-info → nv_ingest_api-2025.10.18.dev20251018.dist-info}/METADATA +1 -1
- {nv_ingest_api-2025.10.16.dev20251016.dist-info → nv_ingest_api-2025.10.18.dev20251018.dist-info}/RECORD +6 -6
- udfs/llm_summarizer_udf.py +4 -2
- {nv_ingest_api-2025.10.16.dev20251016.dist-info → nv_ingest_api-2025.10.18.dev20251018.dist-info}/WHEEL +0 -0
- {nv_ingest_api-2025.10.16.dev20251016.dist-info → nv_ingest_api-2025.10.18.dev20251018.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.10.16.dev20251016.dist-info → nv_ingest_api-2025.10.18.dev20251018.dist-info}/top_level.txt +0 -0
|
@@ -164,10 +164,10 @@ nv_ingest_api/util/string_processing/configuration.py,sha256=2HS08msccuPCT0fn_jf
|
|
|
164
164
|
nv_ingest_api/util/string_processing/yaml.py,sha256=4Zdmc4474lUZn6kznqaNTlQJwsmRnnJQZ-DvAWLu-zo,2678
|
|
165
165
|
nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
166
|
nv_ingest_api/util/system/hardware_info.py,sha256=1UFM8XE6M3pgQcpbVsCsqDQ7Dj-zzptL-XRE-DEu9UA,27213
|
|
167
|
-
nv_ingest_api-2025.10.
|
|
167
|
+
nv_ingest_api-2025.10.18.dev20251018.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
168
168
|
udfs/__init__.py,sha256=pXFqPgXIUqHDfj7SAR1Q19tt8KwGv_iMvhHyziz4AYM,205
|
|
169
|
-
udfs/llm_summarizer_udf.py,sha256=
|
|
170
|
-
nv_ingest_api-2025.10.
|
|
171
|
-
nv_ingest_api-2025.10.
|
|
172
|
-
nv_ingest_api-2025.10.
|
|
173
|
-
nv_ingest_api-2025.10.
|
|
169
|
+
udfs/llm_summarizer_udf.py,sha256=lH5c5NHoT-5ecHC3og_40u1Ujta8SpsKU4X0e4wzbMU,7314
|
|
170
|
+
nv_ingest_api-2025.10.18.dev20251018.dist-info/METADATA,sha256=Rre95YRp0exYbe_sDuMORUrRaNmipBDQLf0eLZ5gb1Q,14086
|
|
171
|
+
nv_ingest_api-2025.10.18.dev20251018.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
172
|
+
nv_ingest_api-2025.10.18.dev20251018.dist-info/top_level.txt,sha256=I1lseG9FF0CH93SPx4kFblsxFuv190cfzaas_CLNIiw,19
|
|
173
|
+
nv_ingest_api-2025.10.18.dev20251018.dist-info/RECORD,,
|
udfs/llm_summarizer_udf.py
CHANGED
|
@@ -84,6 +84,7 @@ def content_summarizer(control_message: "IngestControlMessage") -> "IngestContro
|
|
|
84
84
|
# According to docs/docs/extraction/user_defined_functions.md#understanding-the-dataframe-payload
|
|
85
85
|
# the rows are not necessarily pages. they are chunks of data extracted from the document. in order to select
|
|
86
86
|
# pages, it must require parsing the payload to see which chunks correspond to which pages
|
|
87
|
+
original_df = df.copy()
|
|
87
88
|
if len(df) > 1:
|
|
88
89
|
# TODO: add feature to select N first and last chunks
|
|
89
90
|
df = df.iloc[[0, -1]]
|
|
@@ -107,10 +108,11 @@ def content_summarizer(control_message: "IngestControlMessage") -> "IngestContro
|
|
|
107
108
|
if not stats["failed"]:
|
|
108
109
|
stats["tokens"] = _estimate_tokens(content)
|
|
109
110
|
logger.info("Summarized %d tokens in %f seconds using %s", stats["tokens"], stats["duration"], model_name)
|
|
110
|
-
_store_summary(
|
|
111
|
+
_store_summary(original_df, summary, model_name)
|
|
111
112
|
|
|
112
113
|
# Update the control message with modified DataFrame
|
|
113
|
-
control_message.payload(
|
|
114
|
+
control_message.payload(original_df)
|
|
115
|
+
|
|
114
116
|
else:
|
|
115
117
|
logger.warning("%s failed to summarize content", model_name)
|
|
116
118
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|