nv-ingest-client 2025.10.31.dev20251031__py3-none-any.whl → 2025.11.5.dev20251105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nv_ingest_client/client/client.py +0 -3
- nv_ingest_client/client/interface.py +82 -0
- {nv_ingest_client-2025.10.31.dev20251031.dist-info → nv_ingest_client-2025.11.5.dev20251105.dist-info}/METADATA +1 -1
- {nv_ingest_client-2025.10.31.dev20251031.dist-info → nv_ingest_client-2025.11.5.dev20251105.dist-info}/RECORD +8 -8
- {nv_ingest_client-2025.10.31.dev20251031.dist-info → nv_ingest_client-2025.11.5.dev20251105.dist-info}/WHEEL +0 -0
- {nv_ingest_client-2025.10.31.dev20251031.dist-info → nv_ingest_client-2025.11.5.dev20251105.dist-info}/entry_points.txt +0 -0
- {nv_ingest_client-2025.10.31.dev20251031.dist-info → nv_ingest_client-2025.11.5.dev20251105.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_client-2025.10.31.dev20251031.dist-info → nv_ingest_client-2025.11.5.dev20251105.dist-info}/top_level.txt +0 -0
|
@@ -1705,9 +1705,6 @@ class NvIngestClient:
|
|
|
1705
1705
|
)
|
|
1706
1706
|
logger.error(error_msg)
|
|
1707
1707
|
failures.append((self._job_index_to_job_spec[job_id].source_id, str(e)))
|
|
1708
|
-
finally:
|
|
1709
|
-
# Clean up the job spec mapping
|
|
1710
|
-
del self._job_index_to_job_spec[job_id]
|
|
1711
1708
|
|
|
1712
1709
|
if return_failures:
|
|
1713
1710
|
return results, failures
|
|
@@ -1359,3 +1359,85 @@ class Ingestor:
|
|
|
1359
1359
|
terminal_jobs = self.completed_jobs() + self.failed_jobs() + self.cancelled_jobs()
|
|
1360
1360
|
|
|
1361
1361
|
return len(self._job_states) - terminal_jobs
|
|
1362
|
+
|
|
1363
|
+
def get_status(self) -> Dict[str, str]:
|
|
1364
|
+
"""
|
|
1365
|
+
Returns a dictionary mapping document identifiers to their current status in the pipeline.
|
|
1366
|
+
|
|
1367
|
+
This method is designed for use with async ingestion to poll the status of submitted jobs.
|
|
1368
|
+
For each document submitted to the ingestor, the method returns its current processing state.
|
|
1369
|
+
|
|
1370
|
+
Returns
|
|
1371
|
+
-------
|
|
1372
|
+
Dict[str, str]
|
|
1373
|
+
A dictionary where:
|
|
1374
|
+
- Keys are document identifiers (source names or source IDs)
|
|
1375
|
+
- Values are status strings representing the current state:
|
|
1376
|
+
* "pending": Job created but not yet submitted
|
|
1377
|
+
* "submitted": Job submitted and waiting for processing
|
|
1378
|
+
* "processing": Job is currently being processed
|
|
1379
|
+
* "completed": Job finished successfully
|
|
1380
|
+
* "failed": Job encountered an error
|
|
1381
|
+
* "cancelled": Job was cancelled
|
|
1382
|
+
* "unknown": Job state could not be determined (initial state)
|
|
1383
|
+
|
|
1384
|
+
Examples
|
|
1385
|
+
--------
|
|
1386
|
+
>>> ingestor = Ingestor(documents=["doc1.pdf", "doc2.pdf"], client=client)
|
|
1387
|
+
>>> ingestor.extract().embed()
|
|
1388
|
+
>>> future = ingestor.ingest_async()
|
|
1389
|
+
>>>
|
|
1390
|
+
>>> # Poll status while processing
|
|
1391
|
+
>>> status = ingestor.get_status()
|
|
1392
|
+
>>> print(status)
|
|
1393
|
+
{'doc1.pdf': 'processing', 'doc2.pdf': 'submitted'}
|
|
1394
|
+
>>>
|
|
1395
|
+
>>> # Check again after some time
|
|
1396
|
+
>>> status = ingestor.get_status()
|
|
1397
|
+
>>> print(status)
|
|
1398
|
+
{'doc1.pdf': 'completed', 'doc2.pdf': 'processing'}
|
|
1399
|
+
|
|
1400
|
+
Notes
|
|
1401
|
+
-----
|
|
1402
|
+
- This method is most useful when called after `ingest_async()` to track progress
|
|
1403
|
+
- If called before any jobs are submitted, returns an empty dictionary or
|
|
1404
|
+
documents with "unknown" status
|
|
1405
|
+
- The method accesses internal job state from the client, so it reflects
|
|
1406
|
+
the most current known state
|
|
1407
|
+
"""
|
|
1408
|
+
status_dict = {}
|
|
1409
|
+
|
|
1410
|
+
if not self._job_states:
|
|
1411
|
+
# If job states haven't been initialized yet (before ingest_async is called)
|
|
1412
|
+
# Return unknown status for all documents
|
|
1413
|
+
for doc in self._documents:
|
|
1414
|
+
doc_name = os.path.basename(doc) if isinstance(doc, str) else str(doc)
|
|
1415
|
+
status_dict[doc_name] = "unknown"
|
|
1416
|
+
return status_dict
|
|
1417
|
+
|
|
1418
|
+
# Map job IDs to their states and source identifiers
|
|
1419
|
+
for job_id, job_state in self._job_states.items():
|
|
1420
|
+
# Get the job spec to find the source identifier
|
|
1421
|
+
job_spec = self._client._job_index_to_job_spec.get(job_id)
|
|
1422
|
+
|
|
1423
|
+
if job_spec:
|
|
1424
|
+
# Use source_name as the key (the document name)
|
|
1425
|
+
source_identifier = job_spec.source_name
|
|
1426
|
+
else:
|
|
1427
|
+
# Fallback to job_id if we can't find the spec
|
|
1428
|
+
source_identifier = f"job_{job_id}"
|
|
1429
|
+
|
|
1430
|
+
# Map the JobStateEnum to a user-friendly string
|
|
1431
|
+
state_mapping = {
|
|
1432
|
+
JobStateEnum.PENDING: "pending",
|
|
1433
|
+
JobStateEnum.SUBMITTED_ASYNC: "submitted",
|
|
1434
|
+
JobStateEnum.SUBMITTED: "submitted",
|
|
1435
|
+
JobStateEnum.PROCESSING: "processing",
|
|
1436
|
+
JobStateEnum.COMPLETED: "completed",
|
|
1437
|
+
JobStateEnum.FAILED: "failed",
|
|
1438
|
+
JobStateEnum.CANCELLED: "cancelled",
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1441
|
+
status_dict[source_identifier] = state_mapping.get(job_state.state, "unknown")
|
|
1442
|
+
|
|
1443
|
+
return status_dict
|
|
@@ -6,9 +6,9 @@ nv_ingest_client/cli/util/click.py,sha256=YjQU1uF148FU5D3ozC2m1kkfOOJxO1U8U552-T
|
|
|
6
6
|
nv_ingest_client/cli/util/processing.py,sha256=ULGCYQF1RTDQV_b35YM1WQRqIjR2wQRMJWu41DogagE,6259
|
|
7
7
|
nv_ingest_client/cli/util/system.py,sha256=AQLq0DD2Ns8jRanrKu1tmVBKPA9rl-F3-ZsGI6FXLqE,1105
|
|
8
8
|
nv_ingest_client/client/__init__.py,sha256=eEX9l1qmkLH2lAAZU3eP17SCV06ZjjrshHAB_xbboHA,375
|
|
9
|
-
nv_ingest_client/client/client.py,sha256=
|
|
9
|
+
nv_ingest_client/client/client.py,sha256=3uA54D4Y6lSS-Nvz8R8uzkHkoV8vJu8GPQQRPoc-Uxk,77368
|
|
10
10
|
nv_ingest_client/client/ingest_job_handler.py,sha256=4exvMwXbzwC-tb0dWleXE-AwhJkvxvhkf_u_1bJt30U,18387
|
|
11
|
-
nv_ingest_client/client/interface.py,sha256=
|
|
11
|
+
nv_ingest_client/client/interface.py,sha256=OCbH_5Q-cv1V4HpLBxLdaPCeaNKNkdEYi1JS4Tu6DGY,54745
|
|
12
12
|
nv_ingest_client/client/util/processing.py,sha256=Ky7x7QbLn3BlgYwmrmoIc-o1VwmlmrcP9tn7GVTi0t0,2502
|
|
13
13
|
nv_ingest_client/primitives/__init__.py,sha256=3rbpLCI7Bl0pntGatAxXD_V01y6dcLhHFheI3wqet-I,269
|
|
14
14
|
nv_ingest_client/primitives/jobs/__init__.py,sha256=-yohgHv3LcCtSleHSaxjv1oO7nNcMCjN3ZYoOkIypIk,469
|
|
@@ -47,9 +47,9 @@ nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIr
|
|
|
47
47
|
nv_ingest_client/util/vdb/adt_vdb.py,sha256=UubzAMSfyrqqpD-OQErpBs25hC2Mw8zGZ4waenGXPOk,515
|
|
48
48
|
nv_ingest_client/util/vdb/milvus.py,sha256=6XWRh2SDJlgVZOKZVXG3cZTB4L-ZHIiiTenuIzkxp2Y,78704
|
|
49
49
|
nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
|
|
50
|
-
nv_ingest_client-2025.
|
|
51
|
-
nv_ingest_client-2025.
|
|
52
|
-
nv_ingest_client-2025.
|
|
53
|
-
nv_ingest_client-2025.
|
|
54
|
-
nv_ingest_client-2025.
|
|
55
|
-
nv_ingest_client-2025.
|
|
50
|
+
nv_ingest_client-2025.11.5.dev20251105.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
51
|
+
nv_ingest_client-2025.11.5.dev20251105.dist-info/METADATA,sha256=rB92S3YltqT5qi70cDN7VK1wtRDiOKFMe0vU7Av8tQ4,30626
|
|
52
|
+
nv_ingest_client-2025.11.5.dev20251105.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
53
|
+
nv_ingest_client-2025.11.5.dev20251105.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
|
|
54
|
+
nv_ingest_client-2025.11.5.dev20251105.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
|
|
55
|
+
nv_ingest_client-2025.11.5.dev20251105.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|