nv-ingest-api 2025.6.8.dev20250608__py3-none-any.whl → 2025.6.9.dev20250609__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

@@ -207,7 +207,7 @@ def transform_image_create_vlm_caption(
207
207
  "api_key": api_key,
208
208
  "prompt": prompt,
209
209
  "endpoint_url": endpoint_url,
210
- "image_caption_model_name": model_name,
210
+ "model_name": model_name,
211
211
  }
212
212
  filtered_task_config: Dict[str, str] = {k: v for k, v in task_config.items() if v is not None}
213
213
 
@@ -105,7 +105,7 @@ def _finalize_images(
105
105
  logger.warning(f"Unable to process shape image: {e}")
106
106
 
107
107
  # If you want table/chart detection for these images, do it now
108
- # (similar to docx approach). This might use your YOLO or other method:
108
+ # (similar to docx approach). This might use your YOLO or another method:
109
109
  detection_map = defaultdict(list) # image_idx -> list of CroppedImageWithContent
110
110
  if extract_tables or extract_charts:
111
111
  try:
@@ -407,7 +407,7 @@ def python_pptx(
407
407
  page_nearby_blocks["text"]["bbox"].append(get_bbox(shape_object=shape))
408
408
 
409
409
  # Image processing (deferred)
410
- if extract_images:
410
+ if extract_images or extract_tables or extract_charts:
411
411
  try:
412
412
  process_shape(
413
413
  shape,
@@ -76,7 +76,7 @@ class IngestTaskCaptionSchema(BaseModelNoExt):
76
76
  api_key: Optional[str] = None
77
77
  endpoint_url: Optional[str] = None
78
78
  prompt: Optional[str] = None
79
- caption_model_name: Optional[str] = None
79
+ model_name: Optional[str] = None
80
80
 
81
81
 
82
82
  class IngestTaskFilterParamsSchema(BaseModelNoExt):
@@ -10,6 +10,6 @@ class ImageCaptionExtractionSchema(BaseModel):
10
10
  api_key: str = "api_key"
11
11
  endpoint_url: str = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct/chat/completions"
12
12
  prompt: str = "Caption the content of this image:"
13
- image_caption_model_name: str = "meta/llama-3.2-11b-vision-instruct"
13
+ model_name: str = "meta/llama-3.2-11b-vision-instruct"
14
14
  raise_on_failure: bool = False
15
15
  model_config = ConfigDict(extra="forbid")
@@ -173,7 +173,7 @@ def transform_image_create_vlm_caption_internal(
173
173
  api_key: str = task_config.get("api_key") or transform_config.api_key
174
174
  prompt: str = task_config.get("prompt") or transform_config.prompt
175
175
  endpoint_url: str = task_config.get("endpoint_url") or transform_config.endpoint_url
176
- model_name: str = task_config.get("image_caption_model_name") or transform_config.image_caption_model_name
176
+ model_name: str = task_config.get("model_name") or transform_config.model_name
177
177
 
178
178
  # Create a mask for rows where the content type is "image".
179
179
  df_mask: pd.Series = df_transform_ledger["metadata"].apply(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.6.8.dev20250608
3
+ Version: 2025.6.9.dev20250609
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -3,7 +3,7 @@ nv_ingest_api/interface/__init__.py,sha256=ltWlfmtCewHSRK4B7DF__QvlSUPuliz58JEcE
3
3
  nv_ingest_api/interface/extract.py,sha256=GyBfXKKTGwSb-y0k0nMiTf4HcCT2E-lxLY4aMYAPeOI,38815
4
4
  nv_ingest_api/interface/mutate.py,sha256=eZkd3sbHEJQiEPJyMbhewlPxQNMnL_Xur15icclnb-U,5934
5
5
  nv_ingest_api/interface/store.py,sha256=aR3Cf19lq9Yo9AHlAy1VVcrOP2dgyN01yYhwxyTprkQ,8207
6
- nv_ingest_api/interface/transform.py,sha256=NtRmS36LqL4P4vivuiDe1MqKaELeuh-rlO-9kb6skAE,15829
6
+ nv_ingest_api/interface/transform.py,sha256=g6YnFR7TpEU0xNtzCvv6kqnFbuCwQ6vRMjjBxz3G4n4,15815
7
7
  nv_ingest_api/interface/utility.py,sha256=oXHV2Miz2BKviQg5vOVfiGSvPs2fKJsPDmnxe3fJL9c,7857
8
8
  nv_ingest_api/internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  nv_ingest_api/internal/enums/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -39,7 +39,7 @@ nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=Jk3wrQ
39
39
  nv_ingest_api/internal/extract/pptx/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
40
40
  nv_ingest_api/internal/extract/pptx/pptx_extractor.py,sha256=o-0P2dDyRFW37uQi_lKk6-eFozTcZvbq-2Y4I0EBMIY,7749
41
41
  nv_ingest_api/internal/extract/pptx/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py,sha256=IZu0c_RHDSJwwclOZD3_tDu5jg4AEEfumbwKB78dUE0,29716
42
+ nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py,sha256=p9-77JoG-ztMBrAFfvveSx94y0OE-Q4oCJfzw2iMVgI,29754
43
43
  nv_ingest_api/internal/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
44
44
  nv_ingest_api/internal/mutate/deduplicate.py,sha256=hmvTTGevpCtlkM_wVZSoc8-Exr6rUJwqLjoEnbPcPzY,3849
45
45
  nv_ingest_api/internal/mutate/filter.py,sha256=H-hOTBVP-zLpvQr-FoGIJKxkhtj4l_sZ9V2Fgu3rTEM,5183
@@ -82,7 +82,7 @@ nv_ingest_api/internal/schemas/message_brokers/request_schema.py,sha256=LZX_wXDx
82
82
  nv_ingest_api/internal/schemas/message_brokers/response_schema.py,sha256=4b275HlzBSzpmuE2wdoeaGKPCdKki3wuWldtRIfrj8w,727
83
83
  nv_ingest_api/internal/schemas/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
84
84
  nv_ingest_api/internal/schemas/meta/base_model_noext.py,sha256=8hXU1uuiqZ6t8EsoZ8vlC5EFf2zSZrKEX133FcfZMwI,316
85
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=JqlK66fg_eRQE9kwzAX4b15FqOSbpgrzLWkcWrWbKdE,8124
85
+ nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=szDvgc2A_JetD2Jyewyl4ac4lwpy3NiLxD9dOYz42sM,8116
86
86
  nv_ingest_api/internal/schemas/meta/metadata_schema.py,sha256=KqCUMeBNNgPtpBWzzNhsMtlfr_XvUxnALBbYkI-kfT4,6673
87
87
  nv_ingest_api/internal/schemas/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
88
88
  nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py,sha256=k1JOdlPPpsipc0XhHf-9YxJ_-W0HvpVE1ZhYmr7fzj0,395
@@ -90,7 +90,7 @@ nv_ingest_api/internal/schemas/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQu
90
90
  nv_ingest_api/internal/schemas/store/store_embedding_schema.py,sha256=tdKeiraim9CDL9htgp4oUSCoPMoO5PrHBnlXqDyCpMw,956
91
91
  nv_ingest_api/internal/schemas/store/store_image_schema.py,sha256=p2LGij9i6sG6RYmsfdiQOiWIc2j-POjxYrNuMrp3ELU,1010
92
92
  nv_ingest_api/internal/schemas/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
93
- nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=xLxXJsm8QeaL7KPe7m5sP2rd_AuNRMX29rdeVdoei3Y,582
93
+ nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=ORXAowdjxBUyfkw95eg2F82DRFqEsuV9PwNKVBulcmY,568
94
94
  nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py,sha256=31ThI5fr0yyENeJeE1xMAA-pxk1QVJLwM842zMate_k,429
95
95
  nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=ongmHkJA2953f9_RI7ZYzf5BUnFzVL6Al5E8WKyfgw4,885
96
96
  nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py,sha256=D9K8tvu-tkEBQkZo7uuRzgrHdGyM3ZcNycHbHy5HV2E,791
@@ -98,7 +98,7 @@ nv_ingest_api/internal/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8y
98
98
  nv_ingest_api/internal/store/embed_text_upload.py,sha256=maxb4FPsBvWgvlrjAPEBlRZEFdJX5NxPG-p8kUbzV7I,9898
99
99
  nv_ingest_api/internal/store/image_upload.py,sha256=GNlY4k3pfcHv3lzXxkbmGLeHFsf9PI25bkBn6Xn9h3I,9654
100
100
  nv_ingest_api/internal/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
101
- nv_ingest_api/internal/transform/caption_image.py,sha256=RYL_b26zfaRlbHz0XvLw9HwaMlXpNhr7gayjxGzdALQ,8545
101
+ nv_ingest_api/internal/transform/caption_image.py,sha256=0ILCG2F8ESqKtZiPUM-6F1BHUflFZ76Dzi2GNzkE-lU,8517
102
102
  nv_ingest_api/internal/transform/embed_text.py,sha256=F8kg-WXihtuUMwDQUUYjnfGDCdQp1Mkd-jeThOiJT0s,16507
103
103
  nv_ingest_api/internal/transform/split_text.py,sha256=DlVoyHLqZ-6_FiWwZmofPcq7TX8Ta23hIE0St9tw1IY,6822
104
104
  nv_ingest_api/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -150,8 +150,8 @@ nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=dZ-jrk7IK7oNtHoXFS
150
150
  nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
151
151
  nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
152
152
  nv_ingest_api/util/system/hardware_info.py,sha256=ORZeKpH9kSGU_vuPhyBwkIiMyCViKUX2CP__MCjrfbU,19463
153
- nv_ingest_api-2025.6.8.dev20250608.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
154
- nv_ingest_api-2025.6.8.dev20250608.dist-info/METADATA,sha256=gNSa5IP1pXAw5PlHmyQU7K1RMJZWdsvaIqszQfvevIM,13918
155
- nv_ingest_api-2025.6.8.dev20250608.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
156
- nv_ingest_api-2025.6.8.dev20250608.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
157
- nv_ingest_api-2025.6.8.dev20250608.dist-info/RECORD,,
153
+ nv_ingest_api-2025.6.9.dev20250609.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
154
+ nv_ingest_api-2025.6.9.dev20250609.dist-info/METADATA,sha256=u6ShxLAz31PXFhJkQGiBLGZ_c_N2HEalSTuy2oN24WA,13918
155
+ nv_ingest_api-2025.6.9.dev20250609.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
156
+ nv_ingest_api-2025.6.9.dev20250609.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
157
+ nv_ingest_api-2025.6.9.dev20250609.dist-info/RECORD,,