nv-ingest-client 2025.10.21.dev20251021__tar.gz → 2025.10.23.dev20251023__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

Files changed (62) hide show
  1. {nv_ingest_client-2025.10.21.dev20251021/src/nv_ingest_client.egg-info → nv_ingest_client-2025.10.23.dev20251023}/PKG-INFO +1 -1
  2. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/client/ingest_job_handler.py +27 -6
  3. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/filter.py +1 -1
  4. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023/src/nv_ingest_client.egg-info}/PKG-INFO +1 -1
  5. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/LICENSE +0 -0
  6. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/MANIFEST.in +0 -0
  7. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/README.md +0 -0
  8. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/pyproject.toml +0 -0
  9. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/setup.cfg +0 -0
  10. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/__init__.py +0 -0
  11. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/cli/__init__.py +0 -0
  12. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/cli/util/__init__.py +0 -0
  13. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/cli/util/click.py +0 -0
  14. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/cli/util/processing.py +0 -0
  15. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/cli/util/system.py +0 -0
  16. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/client/__init__.py +0 -0
  17. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/client/client.py +0 -0
  18. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/client/interface.py +0 -0
  19. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/client/util/processing.py +0 -0
  20. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/nv_ingest_cli.py +0 -0
  21. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/__init__.py +0 -0
  22. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/jobs/__init__.py +0 -0
  23. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/jobs/job_spec.py +0 -0
  24. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/jobs/job_state.py +0 -0
  25. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/__init__.py +0 -0
  26. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/audio_extraction.py +0 -0
  27. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/caption.py +0 -0
  28. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/chart_extraction.py +0 -0
  29. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/dedup.py +0 -0
  30. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/embed.py +0 -0
  31. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/extract.py +0 -0
  32. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/infographic_extraction.py +0 -0
  33. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/split.py +0 -0
  34. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/store.py +0 -0
  35. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/table_extraction.py +0 -0
  36. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/task_base.py +0 -0
  37. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/task_factory.py +0 -0
  38. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/udf.py +0 -0
  39. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/primitives/tasks/vdb_upload.py +0 -0
  40. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/__init__.py +0 -0
  41. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/dataset.py +0 -0
  42. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/document_analysis.py +0 -0
  43. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/file_processing/__init__.py +0 -0
  44. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/file_processing/extract.py +0 -0
  45. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/image_disk_utils.py +0 -0
  46. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/milvus.py +0 -0
  47. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/process_json_files.py +0 -0
  48. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/processing.py +0 -0
  49. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/system.py +0 -0
  50. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/transport.py +0 -0
  51. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/util.py +0 -0
  52. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/vdb/__init__.py +0 -0
  53. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/vdb/adt_vdb.py +0 -0
  54. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/vdb/milvus.py +0 -0
  55. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/vdb/opensearch.py +0 -0
  56. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client/util/zipkin.py +0 -0
  57. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client.egg-info/SOURCES.txt +0 -0
  58. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client.egg-info/dependency_links.txt +0 -0
  59. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client.egg-info/entry_points.txt +0 -0
  60. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client.egg-info/requires.txt +0 -0
  61. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/nv_ingest_client.egg-info/top_level.txt +0 -0
  62. {nv_ingest_client-2025.10.21.dev20251021 → nv_ingest_client-2025.10.23.dev20251023}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.10.21.dev20251021
3
+ Version: 2025.10.23.dev20251023
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -329,12 +329,33 @@ class IngestJobHandler:
329
329
  job_id: str = futures_dict[future]
330
330
  trace_ids[job_id_map[job_id]] = trace_id
331
331
 
332
- first_page_metadata = future_response["data"][0]["metadata"]
333
- file_page_counts: Dict[str, int] = {
334
- first_page_metadata["source_metadata"]["source_name"]: first_page_metadata[
335
- "content_metadata"
336
- ]["hierarchy"]["page_count"]
337
- }
332
+ # Extract page count: prefer V2 metadata location, fall back to V1
333
+ page_count = None
334
+ source_name = None
335
+
336
+ # Try V2 metadata location first (top-level metadata.total_pages)
337
+ if "metadata" in future_response and future_response["metadata"]:
338
+ response_metadata = future_response["metadata"]
339
+ page_count = response_metadata.get("total_pages")
340
+ source_name = response_metadata.get("original_source_name")
341
+
342
+ # Fall back to V1 location (first data element's hierarchy.page_count)
343
+ if page_count is None and future_response.get("data"):
344
+ try:
345
+ first_page_metadata = future_response["data"][0]["metadata"]
346
+ page_count = first_page_metadata["content_metadata"]["hierarchy"]["page_count"]
347
+ source_name = first_page_metadata["source_metadata"]["source_name"]
348
+ except (KeyError, IndexError, TypeError):
349
+ # If we can't extract from V1 location, use defaults
350
+ pass
351
+
352
+ # Use extracted values or defaults
353
+ if page_count is None:
354
+ page_count = 0 # Default if not found
355
+ if source_name is None:
356
+ source_name = "unknown_source"
357
+
358
+ file_page_counts: Dict[str, int] = {source_name: page_count}
338
359
 
339
360
  if self.output_directory:
340
361
  self._save_response_data(
@@ -31,7 +31,7 @@ class FilterTask(Task):
31
31
  min_size: int = 128,
32
32
  max_aspect_ratio: Union[int, float] = 5.0,
33
33
  min_aspect_ratio: Union[int, float] = 0.2,
34
- filter: bool = False,
34
+ filter: bool = True,
35
35
  ) -> None:
36
36
  """
37
37
  Setup Filter Task Config
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.10.21.dev20251021
3
+ Version: 2025.10.23.dev20251023
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License