nv-ingest-client 2025.12.14.dev20251214__tar.gz → 2025.12.24.dev20251224__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {nv_ingest_client-2025.12.14.dev20251214/src/nv_ingest_client.egg-info → nv_ingest_client-2025.12.24.dev20251224}/PKG-INFO +1 -1
  2. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/client/client.py +7 -7
  3. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/extract.py +1 -0
  4. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/vdb/milvus.py +2 -1
  5. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224/src/nv_ingest_client.egg-info}/PKG-INFO +1 -1
  6. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/LICENSE +0 -0
  7. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/MANIFEST.in +0 -0
  8. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/README.md +0 -0
  9. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/pyproject.toml +0 -0
  10. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/setup.cfg +0 -0
  11. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/__init__.py +0 -0
  12. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/cli/__init__.py +0 -0
  13. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/cli/util/__init__.py +0 -0
  14. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/cli/util/click.py +0 -0
  15. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/cli/util/processing.py +0 -0
  16. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/cli/util/system.py +0 -0
  17. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/client/__init__.py +0 -0
  18. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/client/ingest_job_handler.py +0 -0
  19. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/client/interface.py +0 -0
  20. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/client/util/processing.py +0 -0
  21. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/nv_ingest_cli.py +0 -0
  22. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/__init__.py +0 -0
  23. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/jobs/__init__.py +0 -0
  24. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/jobs/job_spec.py +0 -0
  25. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/jobs/job_state.py +0 -0
  26. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/__init__.py +0 -0
  27. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/audio_extraction.py +0 -0
  28. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/caption.py +0 -0
  29. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/chart_extraction.py +0 -0
  30. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/dedup.py +0 -0
  31. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/embed.py +0 -0
  32. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/filter.py +0 -0
  33. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/infographic_extraction.py +0 -0
  34. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/ocr_extraction.py +0 -0
  35. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/split.py +0 -0
  36. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/store.py +0 -0
  37. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/table_extraction.py +0 -0
  38. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/task_base.py +0 -0
  39. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/task_factory.py +0 -0
  40. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/udf.py +0 -0
  41. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/primitives/tasks/vdb_upload.py +0 -0
  42. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/__init__.py +0 -0
  43. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/dataset.py +0 -0
  44. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/document_analysis.py +0 -0
  45. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/file_processing/__init__.py +0 -0
  46. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/file_processing/extract.py +0 -0
  47. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/image_disk_utils.py +0 -0
  48. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/milvus.py +0 -0
  49. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/process_json_files.py +0 -0
  50. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/processing.py +0 -0
  51. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/system.py +0 -0
  52. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/transport.py +0 -0
  53. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/util.py +0 -0
  54. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/vdb/__init__.py +0 -0
  55. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/vdb/adt_vdb.py +0 -0
  56. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/vdb/lancedb.py +0 -0
  57. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/vdb/opensearch.py +0 -0
  58. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client/util/zipkin.py +0 -0
  59. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client.egg-info/SOURCES.txt +0 -0
  60. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client.egg-info/dependency_links.txt +0 -0
  61. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client.egg-info/entry_points.txt +0 -0
  62. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client.egg-info/requires.txt +0 -0
  63. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/nv_ingest_client.egg-info/top_level.txt +0 -0
  64. {nv_ingest_client-2025.12.14.dev20251214 → nv_ingest_client-2025.12.24.dev20251224}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.12.14.dev20251214
3
+ Version: 2025.12.24.dev20251224
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -1269,7 +1269,7 @@ class NvIngestClient:
1269
1269
  ----------
1270
1270
  batch_size : Optional[int]
1271
1271
  The batch_size value to validate. None uses value from
1272
- NV_INGEST_BATCH_SIZE environment variable or default 32.
1272
+ NV_INGEST_BATCH_SIZE environment variable or default 16.
1273
1273
 
1274
1274
  Returns
1275
1275
  -------
@@ -1279,18 +1279,18 @@ class NvIngestClient:
1279
1279
  # Handle None/default case
1280
1280
  if batch_size is None:
1281
1281
  try:
1282
- batch_size = int(os.getenv("NV_INGEST_CLIENT_BATCH_SIZE", "32"))
1282
+ batch_size = int(os.getenv("NV_INGEST_CLIENT_BATCH_SIZE", "16"))
1283
1283
  except ValueError:
1284
- batch_size = 32
1284
+ batch_size = 16
1285
1285
 
1286
1286
  # Validate type and range
1287
1287
  if not isinstance(batch_size, int):
1288
- logger.warning(f"batch_size must be an integer, got {type(batch_size).__name__}. Using default 32.")
1289
- return 32
1288
+ logger.warning(f"batch_size must be an integer, got {type(batch_size).__name__}. Using default 16.")
1289
+ return 16
1290
1290
 
1291
1291
  if batch_size < 1:
1292
- logger.warning(f"batch_size must be >= 1, got {batch_size}. Using default 32.")
1293
- return 32
1292
+ logger.warning(f"batch_size must be >= 1, got {batch_size}. Using default 16.")
1293
+ return 16
1294
1294
 
1295
1295
  # Performance guidance warnings
1296
1296
  if batch_size < 8:
@@ -61,6 +61,7 @@ _Type_Extract_Method_PDF = Literal[
61
61
  "tika",
62
62
  "unstructured_io",
63
63
  "unstructured_local",
64
+ "pdfium_hybrid",
64
65
  "ocr",
65
66
  ]
66
67
 
@@ -1049,7 +1049,8 @@ def write_to_nvingest_collection(
1049
1049
  )
1050
1050
  num_elements = len(cleaned_records)
1051
1051
  if num_elements == 0:
1052
- raise ValueError("No records with Embeddings to insert detected.")
1052
+ logger.warning("No records with Embeddings to insert detected.")
1053
+ return
1053
1054
  logger.info(f"{num_elements} elements to insert to milvus")
1054
1055
  logger.info(f"threshold for streaming is {threshold}")
1055
1056
  if num_elements < threshold:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.12.14.dev20251214
3
+ Version: 2025.12.24.dev20251224
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License