PyPI - nv-ingest-api - Versions diffs - 2025.6.16.dev20250616__py3-none-any.whl → 2025.6.24.dev20250625__py3-none-any.whl - Mend

nv-ingest-api 2025.6.16.dev20250616py3-none-any.whl → 2025.6.24.dev20250625py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (6) hide show

nv_ingest_api/internal/transform/split_text.py CHANGED Viewed

@@ -31,9 +31,16 @@ def _build_split_documents(row, chunks: List[str]) -> List[dict[str, Any]]:
         metadata = row.metadata if hasattr(row, "metadata") and isinstance(row.metadata, dict) else {}
         metadata = copy.deepcopy(metadata)
-        metadata["content"] = text
-        documents.append({"document_type": ContentTypeEnum.TEXT.value, "metadata": metadata, "uuid": str(uuid.uuid4())})
+        if row.document_type == ContentTypeEnum.AUDIO:
+            metadata["audio_metadata"]["audio_transcript"] = text
+            documents.append(
+                {"document_type": ContentTypeEnum.AUDIO.value, "metadata": metadata, "uuid": str(uuid.uuid4())}
+            )
+        else:
+            metadata["content"] = text
+            documents.append(
+                {"document_type": ContentTypeEnum.TEXT.value, "metadata": metadata, "uuid": str(uuid.uuid4())}
+            )
     return documents
@@ -118,7 +125,7 @@ def transform_text_split_and_tokenize_internal(
     )
     # Filter to documents with text content.
-    text_type_condition = df_transform_ledger["document_type"] == ContentTypeEnum.TEXT
+    text_type_condition = df_transform_ledger["document_type"].isin([ContentTypeEnum.TEXT, ContentTypeEnum.AUDIO])
     normalized_meta_df = pd.json_normalize(df_transform_ledger["metadata"], errors="ignore")
     if "source_metadata.source_type" in normalized_meta_df.columns:
@@ -147,7 +154,14 @@ def transform_text_split_and_tokenize_internal(
     split_docs: List[Dict[str, Any]] = []
     for _, row in df_filtered.iterrows():
-        content: str = row["metadata"]["content"] if row["metadata"]["content"] is not None else ""
+        if row["document_type"] == ContentTypeEnum.AUDIO:
+            content: str = (
+                row["metadata"]["audio_metadata"]["audio_transcript"]
+                if row["metadata"]["audio_metadata"]["audio_transcript"] is not None
+                else ""
+            )
+        else:
+            content: str = row["metadata"]["content"] if row["metadata"]["content"] is not None else ""
         chunks: List[str] = _split_into_chunks(content, tokenizer_model, chunk_size, chunk_overlap)
         split_docs.extend(_build_split_documents(row, chunks))

{nv_ingest_api-2025.6.16.dev20250616.dist-info → nv_ingest_api-2025.6.24.dev20250625.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nv-ingest-api
-Version: 2025.6.16.dev20250616
+Version: 2025.6.24.dev20250625
 Summary: Python module with core document ingestion functions.
 Author-email: Jeremy Dyer <jdyer@nvidia.com>
 License:                                  Apache License

{nv_ingest_api-2025.6.16.dev20250616.dist-info → nv_ingest_api-2025.6.24.dev20250625.dist-info}/RECORD RENAMED Viewed

@@ -100,7 +100,7 @@ nv_ingest_api/internal/store/image_upload.py,sha256=GNlY4k3pfcHv3lzXxkbmGLeHFsf9
 nv_ingest_api/internal/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/internal/transform/caption_image.py,sha256=0ILCG2F8ESqKtZiPUM-6F1BHUflFZ76Dzi2GNzkE-lU,8517
 nv_ingest_api/internal/transform/embed_text.py,sha256=F8kg-WXihtuUMwDQUUYjnfGDCdQp1Mkd-jeThOiJT0s,16507
-nv_ingest_api/internal/transform/split_text.py,sha256=DlVoyHLqZ-6_FiWwZmofPcq7TX8Ta23hIE0St9tw1IY,6822
+nv_ingest_api/internal/transform/split_text.py,sha256=-kwpRWSVZrPldm1hn3-tVz_TkzuKM-kPvNU3HTp9zOY,7476
 nv_ingest_api/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest_api/util/control_message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nv_ingest_api/util/control_message/validators.py,sha256=KvvbyheJ5rbzvJbH9JKpMR9VfoI0b0uM6eTAZte1p44,1315
@@ -150,8 +150,8 @@ nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=dZ-jrk7IK7oNtHoXFS
 nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
 nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nv_ingest_api/util/system/hardware_info.py,sha256=ORZeKpH9kSGU_vuPhyBwkIiMyCViKUX2CP__MCjrfbU,19463
-nv_ingest_api-2025.6.16.dev20250616.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-nv_ingest_api-2025.6.16.dev20250616.dist-info/METADATA,sha256=nrAshcKmKQTsbPtJTNzrdu37KttrTjLkAhUybCMLrV8,13919
-nv_ingest_api-2025.6.16.dev20250616.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-nv_ingest_api-2025.6.16.dev20250616.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
-nv_ingest_api-2025.6.16.dev20250616.dist-info/RECORD,,
+nv_ingest_api-2025.6.24.dev20250625.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+nv_ingest_api-2025.6.24.dev20250625.dist-info/METADATA,sha256=jnzs5A1QthJjwPAHRU2Z4PmVEAgddnZMiXNX5W-sMjI,13919
+nv_ingest_api-2025.6.24.dev20250625.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+nv_ingest_api-2025.6.24.dev20250625.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
+nv_ingest_api-2025.6.24.dev20250625.dist-info/RECORD,,

{nv_ingest_api-2025.6.16.dev20250616.dist-info → nv_ingest_api-2025.6.24.dev20250625.dist-info}/WHEEL RENAMED Viewed

File without changes

{nv_ingest_api-2025.6.16.dev20250616.dist-info → nv_ingest_api-2025.6.24.dev20250625.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{nv_ingest_api-2025.6.16.dev20250616.dist-info → nv_ingest_api-2025.6.24.dev20250625.dist-info}/top_level.txt RENAMED Viewed

File without changes

nv-ingest-api 2025.6.16.dev20250616__py3-none-any.whl → 2025.6.24.dev20250625__py3-none-any.whl

Potentially problematic release.

nv-ingest-api 2025.6.16.dev20250616py3-none-any.whl → 2025.6.24.dev20250625py3-none-any.whl