nv-ingest 2025.8.12.dev20250812__tar.gz → 2025.8.13.dev20250813__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (109) hide show
  1. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/PKG-INFO +1 -1
  2. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +79 -0
  3. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest.egg-info/PKG-INFO +1 -1
  4. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/LICENSE +0 -0
  5. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/MANIFEST.in +0 -0
  6. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/__init__.py +0 -0
  7. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/api/__init__.py +0 -0
  8. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/api/main.py +0 -0
  9. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/api/v1/__init__.py +0 -0
  10. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/api/v1/health.py +0 -0
  11. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/api/v1/ingest.py +0 -0
  12. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/api/v1/metrics.py +0 -0
  13. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/__init__.py +0 -0
  14. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/__init__.py +0 -0
  15. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  16. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  17. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  18. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  19. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  20. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  21. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
  22. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  23. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  24. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  25. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  26. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  27. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
  28. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
  29. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
  30. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  31. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  32. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
  33. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
  34. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
  35. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
  36. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
  37. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  38. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
  39. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  40. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
  41. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  42. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
  43. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  44. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  45. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  46. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
  47. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  48. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
  49. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
  50. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  51. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  52. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  53. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  54. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
  55. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  56. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
  57. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
  58. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  59. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  60. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  61. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  62. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  63. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
  64. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
  65. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
  66. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  67. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  68. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  69. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
  70. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  71. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
  72. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -0
  73. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
  74. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +0 -0
  75. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
  76. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  77. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  78. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  79. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/schemas/__init__.py +0 -0
  80. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  81. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  82. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  83. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  84. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  85. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  86. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  87. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  88. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  89. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  90. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  91. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/util/__init__.py +0 -0
  92. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  93. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  94. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/util/service/__init__.py +0 -0
  95. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  96. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  97. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
  98. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  99. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  100. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  101. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  102. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  103. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest/version.py +0 -0
  104. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest.egg-info/SOURCES.txt +0 -0
  105. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest.egg-info/dependency_links.txt +0 -0
  106. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest.egg-info/requires.txt +0 -0
  107. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/nv_ingest.egg-info/top_level.txt +0 -0
  108. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/pyproject.toml +0 -0
  109. {nv_ingest-2025.8.12.dev20250812 → nv_ingest-2025.8.13.dev20250813}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.8.12.dev20250812
3
+ Version: 2025.8.13.dev20250813
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -9,6 +9,7 @@ from abc import ABC, abstractmethod
9
9
  from typing import Any, Dict, Optional
10
10
  import os
11
11
  import psutil
12
+ import gc
12
13
 
13
14
  import ray
14
15
  import ray.actor
@@ -129,6 +130,13 @@ class RayActorStage(ABC):
129
130
 
130
131
  self._actor_id_str = self._get_actor_id_str()
131
132
 
133
+ # --- PyArrow memory cleanup configuration/state ---
134
+ # Allow stages to configure the cleanup interval (seconds) via their config.
135
+ # Defaults to 5 minutes if not provided.
136
+ self._memory_cleanup_interval_seconds: int = int(getattr(self.config, "memory_cleanup_interval_seconds", 300))
137
+ self._last_memory_cleanup_time: float = time.time()
138
+ self._memory_cleanups_performed: int = 0
139
+
132
140
  @staticmethod
133
141
  def _get_actor_id_str() -> str:
134
142
  """
@@ -344,6 +352,16 @@ class RayActorStage(ABC):
344
352
  # This is the primary path for "successful processing".
345
353
  self.stats["processed"] += 1
346
354
 
355
+ # Time-based PyArrow memory cleanup check (best-effort, low overhead)
356
+ try:
357
+ current_time = time.time()
358
+ if (current_time - self._last_memory_cleanup_time) >= self._memory_cleanup_interval_seconds:
359
+ self._force_arrow_memory_cleanup()
360
+ self._last_memory_cleanup_time = current_time
361
+ except Exception:
362
+ # Never allow cleanup issues to interfere with processing
363
+ pass
364
+
347
365
  except ray.exceptions.ObjectLostError:
348
366
  # This error is handled inside the loop to prevent the actor from crashing.
349
367
  # We log it and continue to the next message.
@@ -386,10 +404,71 @@ class RayActorStage(ABC):
386
404
  # This block executes when the processing thread is about to exit,
387
405
  # either due to self._running becoming False or an unhandled critical exception.
388
406
  self._logger.debug(f"[{self._actor_id_str}] Processing loop thread finished.")
407
+ # Perform a best-effort final memory cleanup on exit
408
+ try:
409
+ self._force_arrow_memory_cleanup()
410
+ except Exception:
411
+ pass
389
412
  # Signal that this actor's processing duties are complete.
390
413
  # External monitors (e.g., via a future from stop()) can use this signal.
391
414
  self._shutdown_signal_complete = True
392
415
 
416
+ def _force_arrow_memory_cleanup(self) -> None:
417
+ """
418
+ Best-effort memory cleanup for PyArrow allocations.
419
+
420
+ - Runs Python garbage collection to drop unreachable references.
421
+ - If PyArrow is available and its default memory pool supports
422
+ release_unused(), request it to return free pages to the OS.
423
+
424
+ Designed to be safe to call periodically; any failures are logged at
425
+ debug/warning levels and are non-fatal.
426
+ """
427
+ try:
428
+ # First, trigger Python GC to maximize reclaimable memory
429
+ gc.collect()
430
+
431
+ try:
432
+ import pyarrow as pa # Local import to avoid hard dependency at import time
433
+
434
+ pool = pa.default_memory_pool()
435
+ try:
436
+ before_bytes = getattr(pool, "bytes_allocated", lambda: 0)()
437
+ except Exception:
438
+ before_bytes = 0
439
+
440
+ released = False
441
+ if hasattr(pool, "release_unused"):
442
+ try:
443
+ pool.release_unused()
444
+ released = True
445
+ except Exception as e_release:
446
+ self._logger.debug(f"[{self._actor_id_str}] Arrow pool release_unused() failed: {e_release}")
447
+
448
+ try:
449
+ after_bytes = getattr(pool, "bytes_allocated", lambda: before_bytes)()
450
+ except Exception:
451
+ after_bytes = before_bytes
452
+
453
+ if released:
454
+ delta_mb = max(0, (before_bytes - after_bytes) / (1024 * 1024))
455
+ if delta_mb > 0:
456
+ self._logger.debug(
457
+ f"[{self._actor_id_str}] Arrow cleanup released ~{delta_mb:.2f}"
458
+ f" MB (pool now {after_bytes/(1024*1024):.2f} MB)."
459
+ )
460
+ self._memory_cleanups_performed += 1
461
+ except ModuleNotFoundError:
462
+ # PyArrow not present; nothing to do beyond GC.
463
+ self._memory_cleanups_performed += 1
464
+ except Exception as e_pa:
465
+ # Any other PyArrow-related issues are non-fatal.
466
+ self._logger.debug(f"[{self._actor_id_str}] Arrow cleanup skipped due to error: {e_pa}")
467
+ self._memory_cleanups_performed += 1
468
+ except Exception as e:
469
+ # As a last resort, swallow any errors to avoid interfering with the actor loop.
470
+ self._logger.debug(f"[{self._actor_id_str}] Memory cleanup encountered an error: {e}")
471
+
393
472
  def _get_memory_usage_mb(self) -> float:
394
473
  """
395
474
  Gets the total memory usage of the current actor process (RSS).
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.8.12.dev20250812
3
+ Version: 2025.8.13.dev20250813
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License