nv-ingest-client 2025.8.19.dev20250819__tar.gz → 2025.8.20.dev20250820__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

Files changed (59) hide show
  1. {nv_ingest_client-2025.8.19.dev20250819/src/nv_ingest_client.egg-info → nv_ingest_client-2025.8.20.dev20250820}/PKG-INFO +1 -1
  2. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/dataset.py +8 -2
  3. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/vdb/milvus.py +6 -1
  4. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820/src/nv_ingest_client.egg-info}/PKG-INFO +1 -1
  5. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/LICENSE +0 -0
  6. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/MANIFEST.in +0 -0
  7. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/README.md +0 -0
  8. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/pyproject.toml +0 -0
  9. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/setup.cfg +0 -0
  10. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/__init__.py +0 -0
  11. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/cli/__init__.py +0 -0
  12. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/cli/util/__init__.py +0 -0
  13. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/cli/util/click.py +0 -0
  14. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/cli/util/processing.py +0 -0
  15. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/cli/util/system.py +0 -0
  16. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/client/__init__.py +0 -0
  17. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/client/client.py +0 -0
  18. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/client/interface.py +0 -0
  19. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/client/util/processing.py +0 -0
  20. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/nv_ingest_cli.py +0 -0
  21. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/__init__.py +0 -0
  22. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/jobs/__init__.py +0 -0
  23. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/jobs/job_spec.py +0 -0
  24. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/jobs/job_state.py +0 -0
  25. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/__init__.py +0 -0
  26. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/audio_extraction.py +0 -0
  27. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/caption.py +0 -0
  28. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/chart_extraction.py +0 -0
  29. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/dedup.py +0 -0
  30. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/embed.py +0 -0
  31. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/extract.py +0 -0
  32. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/filter.py +0 -0
  33. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/infographic_extraction.py +0 -0
  34. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/split.py +0 -0
  35. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/store.py +0 -0
  36. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/table_extraction.py +0 -0
  37. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/task_base.py +0 -0
  38. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/task_factory.py +0 -0
  39. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/udf.py +0 -0
  40. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/vdb_upload.py +0 -0
  41. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/__init__.py +0 -0
  42. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/file_processing/__init__.py +0 -0
  43. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/file_processing/extract.py +0 -0
  44. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/milvus.py +0 -0
  45. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/process_json_files.py +0 -0
  46. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/processing.py +0 -0
  47. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/system.py +0 -0
  48. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/transport.py +0 -0
  49. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/util.py +0 -0
  50. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/vdb/__init__.py +0 -0
  51. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/vdb/adt_vdb.py +0 -0
  52. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/vdb/opensearch.py +0 -0
  53. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/zipkin.py +0 -0
  54. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client.egg-info/SOURCES.txt +0 -0
  55. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client.egg-info/dependency_links.txt +0 -0
  56. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client.egg-info/entry_points.txt +0 -0
  57. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client.egg-info/requires.txt +0 -0
  58. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client.egg-info/top_level.txt +0 -0
  59. {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.8.19.dev20250819
3
+ Version: 2025.8.20.dev20250820
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -94,8 +94,14 @@ def get_dataset_files(dataset_bytes: BytesIO, shuffle: bool = False) -> list:
94
94
  dataset_bytes.seek(0)
95
95
  dataset = json.load(dataset_bytes)
96
96
  sampled_files = dataset.get("sampled_files", [])
97
- if shuffle:
98
- random.shuffle(sampled_files)
97
+ if shuffle and len(sampled_files) > 1:
98
+ original = list(sampled_files)
99
+ # Create a shuffled copy without mutating the original list
100
+ shuffled = random.sample(sampled_files, k=len(sampled_files))
101
+ # Guard against seeded RNG or accidental identity by forcing a different order
102
+ if shuffled == original:
103
+ shuffled = shuffled[1:] + shuffled[:1]
104
+ return shuffled
99
105
  return sampled_files
100
106
  except json.JSONDecodeError as err:
101
107
  raise ValueError(f"{err}")
@@ -170,10 +170,15 @@ def grab_meta_collection_info(
170
170
  embedding_model: str = None,
171
171
  embedding_dim: int = None,
172
172
  client: MilvusClient = None,
173
+ milvus_uri: str = None,
174
+ username: str = None,
175
+ password: str = None,
173
176
  ):
174
177
  timestamp = timestamp or ""
175
178
  embedding_model = embedding_model or ""
176
179
  embedding_dim = embedding_dim or ""
180
+ if milvus_uri:
181
+ client = MilvusClient(milvus_uri, token=f"{username}:{password}")
177
182
  results = client.query_iterator(
178
183
  collection_name=meta_collection_name,
179
184
  output_fields=[
@@ -771,7 +776,7 @@ def bulk_insert_milvus(
771
776
  """
772
777
  minio_client = Minio(minio_endpoint, access_key=access_key, secret_key=secret_key, secure=False)
773
778
 
774
- connections.connect(uri=milvus_uri, username=username, password=password)
779
+ connections.connect(uri=milvus_uri, token=f"{username}:{password}")
775
780
  t_bulk_start = time.time()
776
781
  task_ids = []
777
782
  uploaded_files = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.8.19.dev20250819
3
+ Version: 2025.8.20.dev20250820
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License