nv-ingest-client 2025.10.22.dev20251022__py3-none-any.whl → 2025.10.24.dev20251024__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

@@ -329,12 +329,33 @@ class IngestJobHandler:
329
329
  job_id: str = futures_dict[future]
330
330
  trace_ids[job_id_map[job_id]] = trace_id
331
331
 
332
- first_page_metadata = future_response["data"][0]["metadata"]
333
- file_page_counts: Dict[str, int] = {
334
- first_page_metadata["source_metadata"]["source_name"]: first_page_metadata[
335
- "content_metadata"
336
- ]["hierarchy"]["page_count"]
337
- }
332
+ # Extract page count: prefer V2 metadata location, fall back to V1
333
+ page_count = None
334
+ source_name = None
335
+
336
+ # Try V2 metadata location first (top-level metadata.total_pages)
337
+ if "metadata" in future_response and future_response["metadata"]:
338
+ response_metadata = future_response["metadata"]
339
+ page_count = response_metadata.get("total_pages")
340
+ source_name = response_metadata.get("original_source_name")
341
+
342
+ # Fall back to V1 location (first data element's hierarchy.page_count)
343
+ if page_count is None and future_response.get("data"):
344
+ try:
345
+ first_page_metadata = future_response["data"][0]["metadata"]
346
+ page_count = first_page_metadata["content_metadata"]["hierarchy"]["page_count"]
347
+ source_name = first_page_metadata["source_metadata"]["source_name"]
348
+ except (KeyError, IndexError, TypeError):
349
+ # If we can't extract from V1 location, use defaults
350
+ pass
351
+
352
+ # Use extracted values or defaults
353
+ if page_count is None:
354
+ page_count = 0 # Default if not found
355
+ if source_name is None:
356
+ source_name = "unknown_source"
357
+
358
+ file_page_counts: Dict[str, int] = {source_name: page_count}
338
359
 
339
360
  if self.output_directory:
340
361
  self._save_response_data(
@@ -31,7 +31,7 @@ class FilterTask(Task):
31
31
  min_size: int = 128,
32
32
  max_aspect_ratio: Union[int, float] = 5.0,
33
33
  min_aspect_ratio: Union[int, float] = 0.2,
34
- filter: bool = False,
34
+ filter: bool = True,
35
35
  ) -> None:
36
36
  """
37
37
  Setup Filter Task Config
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.10.22.dev20251022
3
+ Version: 2025.10.24.dev20251024
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -7,7 +7,7 @@ nv_ingest_client/cli/util/processing.py,sha256=ULGCYQF1RTDQV_b35YM1WQRqIjR2wQRMJ
7
7
  nv_ingest_client/cli/util/system.py,sha256=AQLq0DD2Ns8jRanrKu1tmVBKPA9rl-F3-ZsGI6FXLqE,1105
8
8
  nv_ingest_client/client/__init__.py,sha256=eEX9l1qmkLH2lAAZU3eP17SCV06ZjjrshHAB_xbboHA,375
9
9
  nv_ingest_client/client/client.py,sha256=Ic7FPXGN4o-qk0atcbVcofE0ytgW16-B-KqJtXjY8ws,74461
10
- nv_ingest_client/client/ingest_job_handler.py,sha256=lMk-yQ0b0aK5ucxfNPVhxofzORIIK0jDzCYTmfdMZFw,17059
10
+ nv_ingest_client/client/ingest_job_handler.py,sha256=fwyF48ObgusbSaeMxIgwOG0cuAUC8LYJc7FfxxCLOi0,18346
11
11
  nv_ingest_client/client/interface.py,sha256=7G2M59FayYyxvTwP6YCSeB42l6bMfpBNmd4kadKK6iU,50890
12
12
  nv_ingest_client/client/util/processing.py,sha256=Ky7x7QbLn3BlgYwmrmoIc-o1VwmlmrcP9tn7GVTi0t0,2502
13
13
  nv_ingest_client/primitives/__init__.py,sha256=3rbpLCI7Bl0pntGatAxXD_V01y6dcLhHFheI3wqet-I,269
@@ -21,7 +21,7 @@ nv_ingest_client/primitives/tasks/chart_extraction.py,sha256=s5hsljgSXxQMZHGekpA
21
21
  nv_ingest_client/primitives/tasks/dedup.py,sha256=qort6p3t6ZJuK_74sfOOLp3vMT3hkB5DAu3467WenyY,1719
22
22
  nv_ingest_client/primitives/tasks/embed.py,sha256=I6Irmvm1Qj9oqzDGSgfykCtfz8pz9LNxiXO-t29nXv8,5916
23
23
  nv_ingest_client/primitives/tasks/extract.py,sha256=bRriVkQyXN-UwzprHIt4Lp0iwmAojLEXqBb-IUrf3vY,9328
24
- nv_ingest_client/primitives/tasks/filter.py,sha256=wjcfSBGhdEyPh2tf42NMcyKZziigm24CO9B4obpQytU,2618
24
+ nv_ingest_client/primitives/tasks/filter.py,sha256=dr6fWnh94i50MsGbrz9m_oN6DJKWIWsp7sMwm6Mjz8A,2617
25
25
  nv_ingest_client/primitives/tasks/infographic_extraction.py,sha256=SyTjZQbdVA3QwM5yVm4fUzE4Gu4zm4tAfNLDZMvySV8,1537
26
26
  nv_ingest_client/primitives/tasks/split.py,sha256=8UkB3EialsOTEbsOZLxzmnDIfTJzC6uvjNv21IbgAVA,2332
27
27
  nv_ingest_client/primitives/tasks/store.py,sha256=nIOnCH8vw4FLCLVBJYnsS5Unc0QmuO_jEtUp7-E9FU4,4199
@@ -47,9 +47,9 @@ nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIr
47
47
  nv_ingest_client/util/vdb/adt_vdb.py,sha256=UubzAMSfyrqqpD-OQErpBs25hC2Mw8zGZ4waenGXPOk,515
48
48
  nv_ingest_client/util/vdb/milvus.py,sha256=uJUnH9gv8JYKvmI3BbljEsyRhV3l9-jP4F4sKOcfsWE,78702
49
49
  nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
50
- nv_ingest_client-2025.10.22.dev20251022.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
51
- nv_ingest_client-2025.10.22.dev20251022.dist-info/METADATA,sha256=F8GG56dInsRlhxwtaFangHWMf6QvIPnkfIy6y4jX_qI,30627
52
- nv_ingest_client-2025.10.22.dev20251022.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
- nv_ingest_client-2025.10.22.dev20251022.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
54
- nv_ingest_client-2025.10.22.dev20251022.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
55
- nv_ingest_client-2025.10.22.dev20251022.dist-info/RECORD,,
50
+ nv_ingest_client-2025.10.24.dev20251024.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
51
+ nv_ingest_client-2025.10.24.dev20251024.dist-info/METADATA,sha256=Kytxu5r2qPRsaE8Z2UvAocrXbNEcQgxbEShBQ-ZW_KQ,30627
52
+ nv_ingest_client-2025.10.24.dev20251024.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
+ nv_ingest_client-2025.10.24.dev20251024.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
54
+ nv_ingest_client-2025.10.24.dev20251024.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
55
+ nv_ingest_client-2025.10.24.dev20251024.dist-info/RECORD,,