nv-ingest-client 2025.11.2.dev20251102__tar.gz → 2025.11.5.dev20251105__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

Files changed (62) hide show
  1. {nv_ingest_client-2025.11.2.dev20251102/src/nv_ingest_client.egg-info → nv_ingest_client-2025.11.5.dev20251105}/PKG-INFO +1 -1
  2. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/client/client.py +0 -3
  3. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/client/interface.py +82 -0
  4. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105/src/nv_ingest_client.egg-info}/PKG-INFO +1 -1
  5. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/LICENSE +0 -0
  6. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/MANIFEST.in +0 -0
  7. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/README.md +0 -0
  8. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/pyproject.toml +0 -0
  9. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/setup.cfg +0 -0
  10. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/__init__.py +0 -0
  11. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/cli/__init__.py +0 -0
  12. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/cli/util/__init__.py +0 -0
  13. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/cli/util/click.py +0 -0
  14. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/cli/util/processing.py +0 -0
  15. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/cli/util/system.py +0 -0
  16. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/client/__init__.py +0 -0
  17. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/client/ingest_job_handler.py +0 -0
  18. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/client/util/processing.py +0 -0
  19. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/nv_ingest_cli.py +0 -0
  20. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/__init__.py +0 -0
  21. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/jobs/__init__.py +0 -0
  22. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/jobs/job_spec.py +0 -0
  23. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/jobs/job_state.py +0 -0
  24. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/__init__.py +0 -0
  25. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/audio_extraction.py +0 -0
  26. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/caption.py +0 -0
  27. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/chart_extraction.py +0 -0
  28. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/dedup.py +0 -0
  29. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/embed.py +0 -0
  30. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/extract.py +0 -0
  31. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/filter.py +0 -0
  32. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/infographic_extraction.py +0 -0
  33. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/split.py +0 -0
  34. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/store.py +0 -0
  35. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/table_extraction.py +0 -0
  36. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/task_base.py +0 -0
  37. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/task_factory.py +0 -0
  38. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/udf.py +0 -0
  39. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/primitives/tasks/vdb_upload.py +0 -0
  40. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/__init__.py +0 -0
  41. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/dataset.py +0 -0
  42. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/document_analysis.py +0 -0
  43. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/file_processing/__init__.py +0 -0
  44. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/file_processing/extract.py +0 -0
  45. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/image_disk_utils.py +0 -0
  46. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/milvus.py +0 -0
  47. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/process_json_files.py +0 -0
  48. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/processing.py +0 -0
  49. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/system.py +0 -0
  50. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/transport.py +0 -0
  51. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/util.py +0 -0
  52. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/vdb/__init__.py +0 -0
  53. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/vdb/adt_vdb.py +0 -0
  54. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/vdb/milvus.py +0 -0
  55. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/vdb/opensearch.py +0 -0
  56. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client/util/zipkin.py +0 -0
  57. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client.egg-info/SOURCES.txt +0 -0
  58. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client.egg-info/dependency_links.txt +0 -0
  59. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client.egg-info/entry_points.txt +0 -0
  60. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client.egg-info/requires.txt +0 -0
  61. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/nv_ingest_client.egg-info/top_level.txt +0 -0
  62. {nv_ingest_client-2025.11.2.dev20251102 → nv_ingest_client-2025.11.5.dev20251105}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.11.2.dev20251102
3
+ Version: 2025.11.5.dev20251105
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -1705,9 +1705,6 @@ class NvIngestClient:
1705
1705
  )
1706
1706
  logger.error(error_msg)
1707
1707
  failures.append((self._job_index_to_job_spec[job_id].source_id, str(e)))
1708
- finally:
1709
- # Clean up the job spec mapping
1710
- del self._job_index_to_job_spec[job_id]
1711
1708
 
1712
1709
  if return_failures:
1713
1710
  return results, failures
@@ -1359,3 +1359,85 @@ class Ingestor:
1359
1359
  terminal_jobs = self.completed_jobs() + self.failed_jobs() + self.cancelled_jobs()
1360
1360
 
1361
1361
  return len(self._job_states) - terminal_jobs
1362
+
1363
+ def get_status(self) -> Dict[str, str]:
1364
+ """
1365
+ Returns a dictionary mapping document identifiers to their current status in the pipeline.
1366
+
1367
+ This method is designed for use with async ingestion to poll the status of submitted jobs.
1368
+ For each document submitted to the ingestor, the method returns its current processing state.
1369
+
1370
+ Returns
1371
+ -------
1372
+ Dict[str, str]
1373
+ A dictionary where:
1374
+ - Keys are document identifiers (source names or source IDs)
1375
+ - Values are status strings representing the current state:
1376
+ * "pending": Job created but not yet submitted
1377
+ * "submitted": Job submitted and waiting for processing
1378
+ * "processing": Job is currently being processed
1379
+ * "completed": Job finished successfully
1380
+ * "failed": Job encountered an error
1381
+ * "cancelled": Job was cancelled
1382
+ * "unknown": Job state could not be determined (initial state)
1383
+
1384
+ Examples
1385
+ --------
1386
+ >>> ingestor = Ingestor(documents=["doc1.pdf", "doc2.pdf"], client=client)
1387
+ >>> ingestor.extract().embed()
1388
+ >>> future = ingestor.ingest_async()
1389
+ >>>
1390
+ >>> # Poll status while processing
1391
+ >>> status = ingestor.get_status()
1392
+ >>> print(status)
1393
+ {'doc1.pdf': 'processing', 'doc2.pdf': 'submitted'}
1394
+ >>>
1395
+ >>> # Check again after some time
1396
+ >>> status = ingestor.get_status()
1397
+ >>> print(status)
1398
+ {'doc1.pdf': 'completed', 'doc2.pdf': 'processing'}
1399
+
1400
+ Notes
1401
+ -----
1402
+ - This method is most useful when called after `ingest_async()` to track progress
1403
+ - If called before any jobs are submitted, returns an empty dictionary or
1404
+ documents with "unknown" status
1405
+ - The method accesses internal job state from the client, so it reflects
1406
+ the most current known state
1407
+ """
1408
+ status_dict = {}
1409
+
1410
+ if not self._job_states:
1411
+ # If job states haven't been initialized yet (before ingest_async is called)
1412
+ # Return unknown status for all documents
1413
+ for doc in self._documents:
1414
+ doc_name = os.path.basename(doc) if isinstance(doc, str) else str(doc)
1415
+ status_dict[doc_name] = "unknown"
1416
+ return status_dict
1417
+
1418
+ # Map job IDs to their states and source identifiers
1419
+ for job_id, job_state in self._job_states.items():
1420
+ # Get the job spec to find the source identifier
1421
+ job_spec = self._client._job_index_to_job_spec.get(job_id)
1422
+
1423
+ if job_spec:
1424
+ # Use source_name as the key (the document name)
1425
+ source_identifier = job_spec.source_name
1426
+ else:
1427
+ # Fallback to job_id if we can't find the spec
1428
+ source_identifier = f"job_{job_id}"
1429
+
1430
+ # Map the JobStateEnum to a user-friendly string
1431
+ state_mapping = {
1432
+ JobStateEnum.PENDING: "pending",
1433
+ JobStateEnum.SUBMITTED_ASYNC: "submitted",
1434
+ JobStateEnum.SUBMITTED: "submitted",
1435
+ JobStateEnum.PROCESSING: "processing",
1436
+ JobStateEnum.COMPLETED: "completed",
1437
+ JobStateEnum.FAILED: "failed",
1438
+ JobStateEnum.CANCELLED: "cancelled",
1439
+ }
1440
+
1441
+ status_dict[source_identifier] = state_mapping.get(job_state.state, "unknown")
1442
+
1443
+ return status_dict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.11.2.dev20251102
3
+ Version: 2025.11.5.dev20251105
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License