nv-ingest-client 2025.8.19.dev20250819__tar.gz → 2025.8.21.dev20250821__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

Files changed (59) hide show
  1. {nv_ingest_client-2025.8.19.dev20250819/src/nv_ingest_client.egg-info → nv_ingest_client-2025.8.21.dev20250821}/PKG-INFO +1 -1
  2. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/nv_ingest_cli.py +14 -2
  3. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/dataset.py +8 -2
  4. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/vdb/milvus.py +6 -1
  5. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821/src/nv_ingest_client.egg-info}/PKG-INFO +1 -1
  6. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/LICENSE +0 -0
  7. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/MANIFEST.in +0 -0
  8. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/README.md +0 -0
  9. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/pyproject.toml +0 -0
  10. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/setup.cfg +0 -0
  11. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/__init__.py +0 -0
  12. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/cli/__init__.py +0 -0
  13. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/cli/util/__init__.py +0 -0
  14. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/cli/util/click.py +0 -0
  15. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/cli/util/processing.py +0 -0
  16. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/cli/util/system.py +0 -0
  17. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/client/__init__.py +0 -0
  18. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/client/client.py +0 -0
  19. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/client/interface.py +0 -0
  20. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/client/util/processing.py +0 -0
  21. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/__init__.py +0 -0
  22. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/jobs/__init__.py +0 -0
  23. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/jobs/job_spec.py +0 -0
  24. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/jobs/job_state.py +0 -0
  25. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/__init__.py +0 -0
  26. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/audio_extraction.py +0 -0
  27. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/caption.py +1 -1
  28. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/chart_extraction.py +0 -0
  29. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/dedup.py +0 -0
  30. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/embed.py +0 -0
  31. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/extract.py +0 -0
  32. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/filter.py +0 -0
  33. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/infographic_extraction.py +0 -0
  34. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/split.py +0 -0
  35. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/store.py +0 -0
  36. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/table_extraction.py +0 -0
  37. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/task_base.py +0 -0
  38. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/task_factory.py +0 -0
  39. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/udf.py +0 -0
  40. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/primitives/tasks/vdb_upload.py +0 -0
  41. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/__init__.py +0 -0
  42. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/file_processing/__init__.py +0 -0
  43. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/file_processing/extract.py +0 -0
  44. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/milvus.py +0 -0
  45. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/process_json_files.py +0 -0
  46. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/processing.py +0 -0
  47. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/system.py +0 -0
  48. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/transport.py +0 -0
  49. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/util.py +0 -0
  50. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/vdb/__init__.py +0 -0
  51. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/vdb/adt_vdb.py +0 -0
  52. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/vdb/opensearch.py +0 -0
  53. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client/util/zipkin.py +0 -0
  54. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client.egg-info/SOURCES.txt +0 -0
  55. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client.egg-info/dependency_links.txt +0 -0
  56. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client.egg-info/entry_points.txt +0 -0
  57. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client.egg-info/requires.txt +0 -0
  58. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/nv_ingest_client.egg-info/top_level.txt +0 -0
  59. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.21.dev20250821}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.8.19.dev20250819
3
+ Version: 2025.8.21.dev20250821
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -32,6 +32,7 @@ from nv_ingest_client.client import NvIngestClient
32
32
  from nv_ingest_client.util.dataset import get_dataset_files
33
33
  from nv_ingest_client.util.dataset import get_dataset_statistics
34
34
  from nv_ingest_client.util.system import ensure_directory_with_permissions
35
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
35
36
 
36
37
  try:
37
38
  NV_INGEST_VERSION = version("nv_ingest")
@@ -237,7 +238,9 @@ def main(
237
238
 
238
239
  try:
239
240
  configure_logging(logger, log_level)
240
- logging.debug(f"nv-ingest-cli:params:\n{json.dumps(ctx.params, indent=2, default=repr)}")
241
+ # Sanitize CLI params before logging to avoid leaking secrets
242
+ _sanitized_params = sanitize_for_logging(dict(ctx.params))
243
+ logging.debug(f"nv-ingest-cli:params:\n{json.dumps(_sanitized_params, indent=2, default=repr)}")
241
244
 
242
245
  docs = list(doc)
243
246
  if dataset:
@@ -260,7 +263,16 @@ def main(
260
263
  logger.info(_msg)
261
264
 
262
265
  if not dry_run:
263
- logging.debug(f"Creating message client: {client_host} and port: {client_port} -> {client_kwargs}")
266
+ # Sanitize client kwargs (JSON string) before logging
267
+ try:
268
+ _client_kwargs_obj = json.loads(client_kwargs)
269
+ except Exception:
270
+ _client_kwargs_obj = {"raw": client_kwargs}
271
+ _sanitized_client_kwargs = sanitize_for_logging(_client_kwargs_obj)
272
+ logging.debug(
273
+ f"Creating message client: {client_host} and port: {client_port} -> "
274
+ f"{json.dumps(_sanitized_client_kwargs, indent=2, default=repr)}"
275
+ )
264
276
 
265
277
  if client_type == "rest":
266
278
  client_allocator = RestClient
@@ -94,8 +94,14 @@ def get_dataset_files(dataset_bytes: BytesIO, shuffle: bool = False) -> list:
94
94
  dataset_bytes.seek(0)
95
95
  dataset = json.load(dataset_bytes)
96
96
  sampled_files = dataset.get("sampled_files", [])
97
- if shuffle:
98
- random.shuffle(sampled_files)
97
+ if shuffle and len(sampled_files) > 1:
98
+ original = list(sampled_files)
99
+ # Create a shuffled copy without mutating the original list
100
+ shuffled = random.sample(sampled_files, k=len(sampled_files))
101
+ # Guard against seeded RNG or accidental identity by forcing a different order
102
+ if shuffled == original:
103
+ shuffled = shuffled[1:] + shuffled[:1]
104
+ return shuffled
99
105
  return sampled_files
100
106
  except json.JSONDecodeError as err:
101
107
  raise ValueError(f"{err}")
@@ -170,10 +170,15 @@ def grab_meta_collection_info(
170
170
  embedding_model: str = None,
171
171
  embedding_dim: int = None,
172
172
  client: MilvusClient = None,
173
+ milvus_uri: str = None,
174
+ username: str = None,
175
+ password: str = None,
173
176
  ):
174
177
  timestamp = timestamp or ""
175
178
  embedding_model = embedding_model or ""
176
179
  embedding_dim = embedding_dim or ""
180
+ if milvus_uri:
181
+ client = MilvusClient(milvus_uri, token=f"{username}:{password}")
177
182
  results = client.query_iterator(
178
183
  collection_name=meta_collection_name,
179
184
  output_fields=[
@@ -771,7 +776,7 @@ def bulk_insert_milvus(
771
776
  """
772
777
  minio_client = Minio(minio_endpoint, access_key=access_key, secret_key=secret_key, secure=False)
773
778
 
774
- connections.connect(uri=milvus_uri, username=username, password=password)
779
+ connections.connect(uri=milvus_uri, token=f"{username}:{password}")
775
780
  t_bulk_start = time.time()
776
781
  task_ids = []
777
782
  uploaded_files = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.8.19.dev20250819
3
+ Version: 2025.8.21.dev20250821
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -9,8 +9,8 @@
9
9
  import logging
10
10
  from typing import Dict
11
11
 
12
- from nv_ingest_api.internal.schemas.meta.ingest_job_schema import IngestTaskCaptionSchema
13
12
 
13
+ from nv_ingest_api.internal.schemas.meta.ingest_job_schema import IngestTaskCaptionSchema
14
14
  from .task_base import Task
15
15
 
16
16
  logger = logging.getLogger(__name__)