nv-ingest-api 2025.10.27.dev20251027__py3-none-any.whl → 2025.10.29.dev20251029__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

@@ -254,22 +254,29 @@ else:
254
254
  file = None
255
255
  try:
256
256
  for file in paths:
257
+ if thread_stop.is_set():
258
+ return
257
259
  if isinstance(file, tuple):
258
260
  video_file, audio_file = file
261
+ if thread_stop.is_set():
262
+ return
259
263
  with open(video_file, "rb") as f:
260
264
  video = f.read()
265
+ if thread_stop.is_set():
266
+ return
261
267
  with open(audio_file, "rb") as f:
262
268
  audio = f.read()
263
269
  queue.put((video, audio))
264
270
  else:
265
- if thread_stop:
271
+ if thread_stop.is_set():
266
272
  return
267
273
  with open(file, "rb") as f:
268
274
  queue.put(f.read())
269
275
  except Exception as e:
270
276
  logging.error(f"Error processing file {file}: {e}")
271
277
  queue.put(RuntimeError(f"Error processing file {file}: {e}"))
272
- queue.put(StopIteration)
278
+ finally:
279
+ queue.put(StopIteration)
273
280
 
274
281
  class DataLoader:
275
282
  """
@@ -290,7 +297,7 @@ else:
290
297
  ):
291
298
  interface = interface if interface else MediaInterface()
292
299
  self.thread = None
293
- self.thread_stop = False
300
+ self.thread_stop = threading.Event()
294
301
  self.queue = queue.Queue(size)
295
302
  self.path = Path(path)
296
303
  self.output_dir = output_dir
@@ -323,16 +330,20 @@ else:
323
330
  Reset itertor by stopping the thread and clearing the queue.
324
331
  """
325
332
  if self.thread:
326
- self.thread_stop = True
333
+ self.thread_stop.set()
327
334
  self.thread.join()
328
- self.thread_stop = False
329
- while self.queue.qsize() != 0:
330
- with self.queue.mutex:
331
- self.queue.queue.clear()
335
+ self.thread = None
336
+ try:
337
+ while True:
338
+ self.queue.get_nowait()
339
+ except Exception:
340
+ pass
341
+ finally:
342
+ self.thread_stop.clear()
332
343
 
333
344
  def __iter__(self):
334
345
  self.stop()
335
- self.thread_stop = False
346
+ self.thread_stop.clear()
336
347
  self.thread = threading.Thread(
337
348
  target=load_data,
338
349
  args=(
@@ -5,8 +5,9 @@
5
5
 
6
6
  import logging
7
7
  import math
8
- import multiprocessing as mp
9
8
  import os
9
+ import sys
10
+ import multiprocessing as mp
10
11
  from threading import Lock
11
12
  from typing import Any, Callable, Optional
12
13
 
@@ -103,7 +104,12 @@ class ProcessWorkerPoolSingleton:
103
104
  The total number of worker processes to start.
104
105
  """
105
106
  self._total_workers = total_max_workers
106
- self._context: mp.context.ForkContext = mp.get_context("fork")
107
+
108
+ start_method = "fork"
109
+ if sys.platform.lower() == "darwin":
110
+ start_method = "spawn"
111
+ self._context: mp.context.ForkContext = mp.get_context(start_method)
112
+
107
113
  # Bounded task queue: maximum tasks queued = 2 * total_max_workers.
108
114
  self._task_queue: mp.Queue = self._context.Queue(maxsize=2 * total_max_workers)
109
115
  self._next_task_id: int = 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.10.27.dev20251027
3
+ Version: 2025.10.29.dev20251029
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -116,7 +116,7 @@ nv_ingest_api/util/converters/dftools.py,sha256=FjHjazIeiUd1LdFwWuummJmraqZe1a90
116
116
  nv_ingest_api/util/converters/formats.py,sha256=L11FtormO2SeHSebbwsGE_uuCv6Jk0D3VvVW2avU0vI,2258
117
117
  nv_ingest_api/util/converters/type_mappings.py,sha256=5TVXRyU6BlQvFOdqknEuQw3ss4PXeCvSUynJnjvgQpA,1102
118
118
  nv_ingest_api/util/dataloader/__init__.py,sha256=B6ybDORMI9IzXGdhM7w_agcVj1BNYgAlcfTA0lG5jng,308
119
- nv_ingest_api/util/dataloader/dataloader.py,sha256=r_TU-RfdYerl3k3jRsGIVByxejwz-UQDuallx5-YAGM,14790
119
+ nv_ingest_api/util/dataloader/dataloader.py,sha256=1SG0cHKo7X_eBRTVMJ9EFJOfpqe37QqfEoMxeAWxkEU,15124
120
120
  nv_ingest_api/util/detectors/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
121
121
  nv_ingest_api/util/detectors/language.py,sha256=TvzcESYY0bn0U4aLY6GjB4VaCWA6XrXxAGZbVzHTMuE,965
122
122
  nv_ingest_api/util/exception_handlers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -147,7 +147,7 @@ nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py,sha256
147
147
  nv_ingest_api/util/metadata/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
148
148
  nv_ingest_api/util/metadata/aggregators.py,sha256=YYdvJ1E04eGFZKKHUxXoH6mzLg8nor9Smvnv0qzqK5w,15988
149
149
  nv_ingest_api/util/multi_processing/__init__.py,sha256=4fojP8Rp_5Hu1YAkqGylqTyEZ-HBVVEunn5Z9I99swA,242
150
- nv_ingest_api/util/multi_processing/mp_pool_singleton.py,sha256=dTfP82DgGPaXEJH3jywTO8rNlLZUniD4FFzwv84_giE,7372
150
+ nv_ingest_api/util/multi_processing/mp_pool_singleton.py,sha256=34O7I8Lin5GvO_zNZGbsqEGkDvIbqy_0Eh3ejoPNDVE,7501
151
151
  nv_ingest_api/util/nim/__init__.py,sha256=No45pMstom1Jo0EENT6VEFkZn3YmTha7lYaBZU7xtHk,2116
152
152
  nv_ingest_api/util/pdf/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
153
153
  nv_ingest_api/util/pdf/pdfium.py,sha256=1aPCnPKXHWnncYoMO8HllYjrhODSXIeRBIsSLDevpYs,15667
@@ -165,10 +165,10 @@ nv_ingest_api/util/string_processing/configuration.py,sha256=2HS08msccuPCT0fn_jf
165
165
  nv_ingest_api/util/string_processing/yaml.py,sha256=4Zdmc4474lUZn6kznqaNTlQJwsmRnnJQZ-DvAWLu-zo,2678
166
166
  nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
167
  nv_ingest_api/util/system/hardware_info.py,sha256=1UFM8XE6M3pgQcpbVsCsqDQ7Dj-zzptL-XRE-DEu9UA,27213
168
- nv_ingest_api-2025.10.27.dev20251027.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
168
+ nv_ingest_api-2025.10.29.dev20251029.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
169
169
  udfs/__init__.py,sha256=pXFqPgXIUqHDfj7SAR1Q19tt8KwGv_iMvhHyziz4AYM,205
170
170
  udfs/llm_summarizer_udf.py,sha256=lH5c5NHoT-5ecHC3og_40u1Ujta8SpsKU4X0e4wzbMU,7314
171
- nv_ingest_api-2025.10.27.dev20251027.dist-info/METADATA,sha256=fQdSW5uS_1SsaCF8ddbueLFilG6l51Cg2HaZ076Pj0M,14106
172
- nv_ingest_api-2025.10.27.dev20251027.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
173
- nv_ingest_api-2025.10.27.dev20251027.dist-info/top_level.txt,sha256=I1lseG9FF0CH93SPx4kFblsxFuv190cfzaas_CLNIiw,19
174
- nv_ingest_api-2025.10.27.dev20251027.dist-info/RECORD,,
171
+ nv_ingest_api-2025.10.29.dev20251029.dist-info/METADATA,sha256=0aFHQxZbEF6-N4iGk5aUxQpasZM73Ae49nQFa90k_-Q,14106
172
+ nv_ingest_api-2025.10.29.dev20251029.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
173
+ nv_ingest_api-2025.10.29.dev20251029.dist-info/top_level.txt,sha256=I1lseG9FF0CH93SPx4kFblsxFuv190cfzaas_CLNIiw,19
174
+ nv_ingest_api-2025.10.29.dev20251029.dist-info/RECORD,,