nv-ingest-client 2025.10.21.dev20251021__py3-none-any.whl → 2025.10.23.dev20251023__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-client might be problematic. Click here for more details.
- nv_ingest_client/client/ingest_job_handler.py +27 -6
- nv_ingest_client/primitives/tasks/filter.py +1 -1
- {nv_ingest_client-2025.10.21.dev20251021.dist-info → nv_ingest_client-2025.10.23.dev20251023.dist-info}/METADATA +1 -1
- {nv_ingest_client-2025.10.21.dev20251021.dist-info → nv_ingest_client-2025.10.23.dev20251023.dist-info}/RECORD +8 -8
- {nv_ingest_client-2025.10.21.dev20251021.dist-info → nv_ingest_client-2025.10.23.dev20251023.dist-info}/WHEEL +0 -0
- {nv_ingest_client-2025.10.21.dev20251021.dist-info → nv_ingest_client-2025.10.23.dev20251023.dist-info}/entry_points.txt +0 -0
- {nv_ingest_client-2025.10.21.dev20251021.dist-info → nv_ingest_client-2025.10.23.dev20251023.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_client-2025.10.21.dev20251021.dist-info → nv_ingest_client-2025.10.23.dev20251023.dist-info}/top_level.txt +0 -0
|
@@ -329,12 +329,33 @@ class IngestJobHandler:
|
|
|
329
329
|
job_id: str = futures_dict[future]
|
|
330
330
|
trace_ids[job_id_map[job_id]] = trace_id
|
|
331
331
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
332
|
+
# Extract page count: prefer V2 metadata location, fall back to V1
|
|
333
|
+
page_count = None
|
|
334
|
+
source_name = None
|
|
335
|
+
|
|
336
|
+
# Try V2 metadata location first (top-level metadata.total_pages)
|
|
337
|
+
if "metadata" in future_response and future_response["metadata"]:
|
|
338
|
+
response_metadata = future_response["metadata"]
|
|
339
|
+
page_count = response_metadata.get("total_pages")
|
|
340
|
+
source_name = response_metadata.get("original_source_name")
|
|
341
|
+
|
|
342
|
+
# Fall back to V1 location (first data element's hierarchy.page_count)
|
|
343
|
+
if page_count is None and future_response.get("data"):
|
|
344
|
+
try:
|
|
345
|
+
first_page_metadata = future_response["data"][0]["metadata"]
|
|
346
|
+
page_count = first_page_metadata["content_metadata"]["hierarchy"]["page_count"]
|
|
347
|
+
source_name = first_page_metadata["source_metadata"]["source_name"]
|
|
348
|
+
except (KeyError, IndexError, TypeError):
|
|
349
|
+
# If we can't extract from V1 location, use defaults
|
|
350
|
+
pass
|
|
351
|
+
|
|
352
|
+
# Use extracted values or defaults
|
|
353
|
+
if page_count is None:
|
|
354
|
+
page_count = 0 # Default if not found
|
|
355
|
+
if source_name is None:
|
|
356
|
+
source_name = "unknown_source"
|
|
357
|
+
|
|
358
|
+
file_page_counts: Dict[str, int] = {source_name: page_count}
|
|
338
359
|
|
|
339
360
|
if self.output_directory:
|
|
340
361
|
self._save_response_data(
|
|
@@ -7,7 +7,7 @@ nv_ingest_client/cli/util/processing.py,sha256=ULGCYQF1RTDQV_b35YM1WQRqIjR2wQRMJ
|
|
|
7
7
|
nv_ingest_client/cli/util/system.py,sha256=AQLq0DD2Ns8jRanrKu1tmVBKPA9rl-F3-ZsGI6FXLqE,1105
|
|
8
8
|
nv_ingest_client/client/__init__.py,sha256=eEX9l1qmkLH2lAAZU3eP17SCV06ZjjrshHAB_xbboHA,375
|
|
9
9
|
nv_ingest_client/client/client.py,sha256=Ic7FPXGN4o-qk0atcbVcofE0ytgW16-B-KqJtXjY8ws,74461
|
|
10
|
-
nv_ingest_client/client/ingest_job_handler.py,sha256=
|
|
10
|
+
nv_ingest_client/client/ingest_job_handler.py,sha256=fwyF48ObgusbSaeMxIgwOG0cuAUC8LYJc7FfxxCLOi0,18346
|
|
11
11
|
nv_ingest_client/client/interface.py,sha256=7G2M59FayYyxvTwP6YCSeB42l6bMfpBNmd4kadKK6iU,50890
|
|
12
12
|
nv_ingest_client/client/util/processing.py,sha256=Ky7x7QbLn3BlgYwmrmoIc-o1VwmlmrcP9tn7GVTi0t0,2502
|
|
13
13
|
nv_ingest_client/primitives/__init__.py,sha256=3rbpLCI7Bl0pntGatAxXD_V01y6dcLhHFheI3wqet-I,269
|
|
@@ -21,7 +21,7 @@ nv_ingest_client/primitives/tasks/chart_extraction.py,sha256=s5hsljgSXxQMZHGekpA
|
|
|
21
21
|
nv_ingest_client/primitives/tasks/dedup.py,sha256=qort6p3t6ZJuK_74sfOOLp3vMT3hkB5DAu3467WenyY,1719
|
|
22
22
|
nv_ingest_client/primitives/tasks/embed.py,sha256=I6Irmvm1Qj9oqzDGSgfykCtfz8pz9LNxiXO-t29nXv8,5916
|
|
23
23
|
nv_ingest_client/primitives/tasks/extract.py,sha256=bRriVkQyXN-UwzprHIt4Lp0iwmAojLEXqBb-IUrf3vY,9328
|
|
24
|
-
nv_ingest_client/primitives/tasks/filter.py,sha256=
|
|
24
|
+
nv_ingest_client/primitives/tasks/filter.py,sha256=dr6fWnh94i50MsGbrz9m_oN6DJKWIWsp7sMwm6Mjz8A,2617
|
|
25
25
|
nv_ingest_client/primitives/tasks/infographic_extraction.py,sha256=SyTjZQbdVA3QwM5yVm4fUzE4Gu4zm4tAfNLDZMvySV8,1537
|
|
26
26
|
nv_ingest_client/primitives/tasks/split.py,sha256=8UkB3EialsOTEbsOZLxzmnDIfTJzC6uvjNv21IbgAVA,2332
|
|
27
27
|
nv_ingest_client/primitives/tasks/store.py,sha256=nIOnCH8vw4FLCLVBJYnsS5Unc0QmuO_jEtUp7-E9FU4,4199
|
|
@@ -47,9 +47,9 @@ nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIr
|
|
|
47
47
|
nv_ingest_client/util/vdb/adt_vdb.py,sha256=UubzAMSfyrqqpD-OQErpBs25hC2Mw8zGZ4waenGXPOk,515
|
|
48
48
|
nv_ingest_client/util/vdb/milvus.py,sha256=uJUnH9gv8JYKvmI3BbljEsyRhV3l9-jP4F4sKOcfsWE,78702
|
|
49
49
|
nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
|
|
50
|
-
nv_ingest_client-2025.10.
|
|
51
|
-
nv_ingest_client-2025.10.
|
|
52
|
-
nv_ingest_client-2025.10.
|
|
53
|
-
nv_ingest_client-2025.10.
|
|
54
|
-
nv_ingest_client-2025.10.
|
|
55
|
-
nv_ingest_client-2025.10.
|
|
50
|
+
nv_ingest_client-2025.10.23.dev20251023.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
51
|
+
nv_ingest_client-2025.10.23.dev20251023.dist-info/METADATA,sha256=kNX66Gfeb4LT9HE3fK6QWHOI-skdgupXHDXGz3dhln8,30627
|
|
52
|
+
nv_ingest_client-2025.10.23.dev20251023.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
53
|
+
nv_ingest_client-2025.10.23.dev20251023.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
|
|
54
|
+
nv_ingest_client-2025.10.23.dev20251023.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
|
|
55
|
+
nv_ingest_client-2025.10.23.dev20251023.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|