nv-ingest-api 2025.7.15.dev20250715__py3-none-any.whl → 2025.7.17.dev20250717__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (28) hide show
  1. nv_ingest_api/interface/extract.py +18 -18
  2. nv_ingest_api/internal/enums/common.py +6 -0
  3. nv_ingest_api/internal/extract/image/chart_extractor.py +75 -55
  4. nv_ingest_api/internal/extract/image/infographic_extractor.py +59 -35
  5. nv_ingest_api/internal/extract/image/table_extractor.py +81 -63
  6. nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +7 -7
  7. nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +32 -20
  8. nv_ingest_api/internal/extract/pdf/engines/pdfium.py +32 -9
  9. nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +58 -0
  10. nv_ingest_api/internal/primitives/nim/model_interface/{paddle.py → ocr.py} +132 -39
  11. nv_ingest_api/internal/primitives/nim/nim_client.py +46 -11
  12. nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +6 -6
  13. nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +6 -6
  14. nv_ingest_api/internal/schemas/extract/extract_table_schema.py +5 -5
  15. nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +5 -0
  16. nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +4 -0
  17. nv_ingest_api/internal/transform/embed_text.py +103 -12
  18. nv_ingest_api/internal/transform/split_text.py +13 -8
  19. nv_ingest_api/util/image_processing/table_and_chart.py +97 -42
  20. nv_ingest_api/util/image_processing/transforms.py +19 -5
  21. nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +1 -1
  22. nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +51 -48
  23. nv_ingest_api/util/metadata/aggregators.py +4 -1
  24. {nv_ingest_api-2025.7.15.dev20250715.dist-info → nv_ingest_api-2025.7.17.dev20250717.dist-info}/METADATA +1 -1
  25. {nv_ingest_api-2025.7.15.dev20250715.dist-info → nv_ingest_api-2025.7.17.dev20250717.dist-info}/RECORD +28 -28
  26. {nv_ingest_api-2025.7.15.dev20250715.dist-info → nv_ingest_api-2025.7.17.dev20250717.dist-info}/WHEEL +0 -0
  27. {nv_ingest_api-2025.7.15.dev20250715.dist-info → nv_ingest_api-2025.7.17.dev20250717.dist-info}/licenses/LICENSE +0 -0
  28. {nv_ingest_api-2025.7.15.dev20250715.dist-info → nv_ingest_api-2025.7.17.dev20250717.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,7 @@ import logging
14
14
  from typing import Optional, Tuple, Union
15
15
 
16
16
  from nv_ingest_api.internal.schemas.message_brokers.response_schema import ResponseSchema
17
- from nv_ingest_api.util.service_clients.client_base import MessageBrokerClientBase, FetchMode
17
+ from nv_ingest_api.util.service_clients.client_base import MessageBrokerClientBase
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
 
@@ -108,29 +108,23 @@ class SimpleClient(MessageBrokerClientBase):
108
108
  return self._handle_push(queue_name, message, timeout, for_nv_ingest)
109
109
 
110
110
  def fetch_message(
111
- self,
112
- queue_name: str,
113
- timeout: Optional[Tuple[int, Union[float]]] = (100, None),
114
- override_fetch_mode: FetchMode = None,
111
+ self, queue_name: str, timeout: Optional[Tuple[int, Union[float, None]]] = (1200, None)
115
112
  ) -> ResponseSchema:
116
113
  """
117
- Fetch a message from the specified queue.
114
+ Fetch a message from a specified queue.
118
115
 
119
116
  Parameters
120
117
  ----------
121
118
  queue_name : str
122
119
  The name of the queue.
123
- timeout : float, optional
124
- Timeout in seconds for the operation.
120
+ timeout : tuple, optional
121
+ A tuple containing the timeout value and an unused second element.
125
122
 
126
123
  Returns
127
124
  -------
128
125
  ResponseSchema
129
- The response containing the fetched message.
126
+ The response from the broker.
130
127
  """
131
- if isinstance(timeout, int):
132
- timeout = (timeout, None)
133
-
134
128
  return self._handle_pop(queue_name, timeout)
135
129
 
136
130
  def ping(self) -> ResponseSchema:
@@ -208,6 +202,7 @@ class SimpleClient(MessageBrokerClientBase):
208
202
 
209
203
  try:
210
204
  with socket.create_connection((self._host, self._port), timeout=self._connection_timeout) as sock:
205
+ sock.settimeout(self._connection_timeout)
211
206
  self._send(sock, json.dumps(command).encode("utf-8"))
212
207
  # Receive initial response with transaction ID
213
208
  response_data = self._recv(sock)
@@ -241,8 +236,9 @@ class SimpleClient(MessageBrokerClientBase):
241
236
 
242
237
  return ResponseSchema(**final_response)
243
238
 
244
- except (ConnectionError, socket.error, BrokenPipeError):
245
- pass
239
+ except (ConnectionError, socket.error, BrokenPipeError, socket.timeout) as e:
240
+ logger.debug(f"Connection error during PUSH: {e}")
241
+ pass # Will be retried
246
242
  except json.JSONDecodeError:
247
243
  return ResponseSchema(response_code=1, response_reason="Invalid JSON response from server.")
248
244
  except Exception as e:
@@ -272,61 +268,67 @@ class SimpleClient(MessageBrokerClientBase):
272
268
 
273
269
  command = {"command": "POP", "queue_name": queue_name}
274
270
 
275
- timeout = int(timeout[0])
271
+ timeout_val = timeout[0] if isinstance(timeout, tuple) else timeout
276
272
 
277
- if timeout is not None:
278
- command["timeout"] = timeout
273
+ if timeout_val is not None:
274
+ command["timeout"] = timeout_val
279
275
 
280
276
  start_time = time.time()
277
+ backoff_delay = 1 # Start with a 1-second backoff
278
+
281
279
  while True:
282
280
  elapsed = time.time() - start_time
283
- remaining_timeout = timeout - elapsed if timeout else None
284
- if remaining_timeout is not None and remaining_timeout <= 0:
285
- return ResponseSchema(response_code=1, response_reason="POP operation timed out.")
281
+ if timeout_val is not None and elapsed >= timeout_val:
282
+ return ResponseSchema(response_code=2, response_reason="Job not ready.")
286
283
 
287
284
  try:
288
285
  with socket.create_connection((self._host, self._port), timeout=self._connection_timeout) as sock:
286
+ sock.settimeout(self._connection_timeout)
289
287
  self._send(sock, json.dumps(command).encode("utf-8"))
290
288
  # Receive initial response with transaction ID and message
291
289
  response_data = self._recv(sock)
292
290
  response = json.loads(response_data)
293
291
 
294
- if response.get("response_code") != 0:
295
- if response.get("response_reason") == "Queue is empty":
296
- time.sleep(0.1)
297
- continue
298
- else:
299
- return ResponseSchema(**response)
300
-
301
- if "transaction_id" not in response:
302
- error_msg = "No transaction_id in response."
292
+ # The broker now returns a response_code of 2 for a timeout, which the high-level
293
+ # client should handle as a retryable event.
294
+ if response.get("response_code") == 2:
295
+ # Queue is empty or job not ready, continue to backoff and retry
296
+ pass
297
+ elif response.get("response_code") != 0:
298
+ return ResponseSchema(**response)
299
+ else:
300
+ # Success case: we received a message.
301
+ if "transaction_id" not in response:
302
+ return ResponseSchema(response_code=1, response_reason="No transaction_id in response.")
303
303
 
304
- return ResponseSchema(response_code=1, response_reason=error_msg)
304
+ transaction_id = response["transaction_id"]
305
+ message = response.get("response")
305
306
 
306
- transaction_id = response["transaction_id"]
307
- message = response.get("response")
307
+ # Send ACK
308
+ ack_data = json.dumps({"transaction_id": transaction_id, "ack": True}).encode("utf-8")
309
+ self._send(sock, ack_data)
308
310
 
309
- # Send ACK
310
- ack_data = json.dumps({"transaction_id": transaction_id, "ack": True}).encode("utf-8")
311
- self._send(sock, ack_data)
311
+ # Receive final response
312
+ final_response_data = self._recv(sock)
313
+ final_response = json.loads(final_response_data)
312
314
 
313
- # Receive final response
314
- final_response_data = self._recv(sock)
315
- final_response = json.loads(final_response_data)
316
-
317
- if final_response.get("response_code") == 0:
318
- return ResponseSchema(response_code=0, response=message, transaction_id=transaction_id)
319
- else:
320
- return ResponseSchema(**final_response)
315
+ if final_response.get("response_code") == 0:
316
+ return ResponseSchema(response_code=0, response=message, transaction_id=transaction_id)
317
+ else:
318
+ return ResponseSchema(**final_response)
321
319
 
322
- except (ConnectionError, socket.error, BrokenPipeError):
323
- pass
320
+ except (ConnectionError, socket.error, BrokenPipeError, socket.timeout) as e:
321
+ # Let the high-level client handle connection errors as retryable.
322
+ logger.debug(f"Connection error during POP: {e}, will retry after backoff.")
323
+ pass # Fall through to backoff and retry
324
324
  except json.JSONDecodeError:
325
325
  return ResponseSchema(response_code=1, response_reason="Invalid JSON response from server.")
326
326
  except Exception as e:
327
327
  return ResponseSchema(response_code=1, response_reason=str(e))
328
328
 
329
- time.sleep(0.1) # Backoff delay before retry
329
+ # Exponential backoff
330
+ time.sleep(backoff_delay)
331
+ backoff_delay = min(backoff_delay * 2, self._max_backoff)
330
332
 
331
333
  def _execute_simple_command(self, command: dict) -> ResponseSchema:
332
334
  """
@@ -350,12 +352,13 @@ class SimpleClient(MessageBrokerClientBase):
350
352
 
351
353
  try:
352
354
  with socket.create_connection((self._host, self._port), timeout=self._connection_timeout) as sock:
355
+ sock.settimeout(self._connection_timeout)
353
356
  self._send(sock, data)
354
357
  response_data = self._recv(sock)
355
358
  response = json.loads(response_data)
356
359
  return ResponseSchema(**response)
357
- except (ConnectionError, socket.error, BrokenPipeError) as e:
358
- return ResponseSchema(response_code=1, response_reason=f"Connection error: {e}")
360
+ except (ConnectionError, socket.error, BrokenPipeError, socket.timeout) as e:
361
+ return ResponseSchema(response_code=2, response_reason=f"Connection error: {e}")
359
362
  except json.JSONDecodeError:
360
363
  return ResponseSchema(response_code=1, response_reason="Invalid JSON response from server.")
361
364
  except Exception as e:
@@ -201,6 +201,8 @@ def construct_image_metadata_from_base64(
201
201
  page_count: int,
202
202
  source_metadata: Dict[str, Any],
203
203
  base_unified_metadata: Dict[str, Any],
204
+ subtype: None | ContentTypeEnum | str = "",
205
+ text: str = "",
204
206
  ) -> List[Any]:
205
207
  """
206
208
  Extracts image data from a base64-encoded image string, decodes the image to get
@@ -252,6 +254,7 @@ def construct_image_metadata_from_base64(
252
254
  "line": -1,
253
255
  "span": -1,
254
256
  },
257
+ "subtype": subtype or "",
255
258
  }
256
259
 
257
260
  # Construct image metadata
@@ -259,7 +262,7 @@ def construct_image_metadata_from_base64(
259
262
  "image_type": DocumentTypeEnum.PNG,
260
263
  "structured_image_type": ContentTypeEnum.UNKNOWN,
261
264
  "caption": "",
262
- "text": "",
265
+ "text": text,
263
266
  "image_location": bbox,
264
267
  "image_location_max_dimensions": (width, height),
265
268
  "height": height,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.7.15.dev20250715
3
+ Version: 2025.7.17.dev20250717
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -1,13 +1,13 @@
1
1
  nv_ingest_api/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
2
2
  nv_ingest_api/interface/__init__.py,sha256=ltWlfmtCewHSRK4B7DF__QvlSUPuliz58JEcEIeIgI0,10134
3
- nv_ingest_api/interface/extract.py,sha256=GyBfXKKTGwSb-y0k0nMiTf4HcCT2E-lxLY4aMYAPeOI,38815
3
+ nv_ingest_api/interface/extract.py,sha256=o9OdoWxYsj-O4HsDe6wWbyd69OAueb2rlMtKSzOrKZo,38743
4
4
  nv_ingest_api/interface/mutate.py,sha256=eZkd3sbHEJQiEPJyMbhewlPxQNMnL_Xur15icclnb-U,5934
5
5
  nv_ingest_api/interface/store.py,sha256=aR3Cf19lq9Yo9AHlAy1VVcrOP2dgyN01yYhwxyTprkQ,8207
6
6
  nv_ingest_api/interface/transform.py,sha256=g6YnFR7TpEU0xNtzCvv6kqnFbuCwQ6vRMjjBxz3G4n4,15815
7
7
  nv_ingest_api/interface/utility.py,sha256=AL4l0cJNvTjG1MAe1YNTk1jbbPED3g4HCewzx6Ffcio,7296
8
8
  nv_ingest_api/internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  nv_ingest_api/internal/enums/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
10
- nv_ingest_api/internal/enums/common.py,sha256=HSj7qqNr6KXu_FIyK_Wvel24R-r8lV7dLA173z5XFBc,12321
10
+ nv_ingest_api/internal/enums/common.py,sha256=lzDJ35VWfIwlL_Lx_q0dfHUuwEB7CXudHIQAilpjoRw,12611
11
11
  nv_ingest_api/internal/extract/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
12
12
  nv_ingest_api/internal/extract/audio/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
13
13
  nv_ingest_api/internal/extract/audio/audio_extraction.py,sha256=_jf_UC_FTqZr-xEpwG8edwBzdDjM01gGhqm9ulOsDcY,6973
@@ -20,10 +20,10 @@ nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py,sha
20
20
  nv_ingest_api/internal/extract/html/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
21
21
  nv_ingest_api/internal/extract/html/html_extractor.py,sha256=I9oWfj6_As4898GDDh0zsSuKxO3lBsvyYzhvUotjzJI,3282
22
22
  nv_ingest_api/internal/extract/image/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
23
- nv_ingest_api/internal/extract/image/chart_extractor.py,sha256=CkaW8ihPmGMQGrZh0ih14gtEpWuGOJ8InPQfZwpsP2g,13300
23
+ nv_ingest_api/internal/extract/image/chart_extractor.py,sha256=KLCSkLUczIlaqIQxyXzKQnwm-HQqTJKMPafnTobOEQs,13873
24
24
  nv_ingest_api/internal/extract/image/image_extractor.py,sha256=4tUWinuFMN3ukWa2tZa2_LtzRiTyUAUCBF6BDkUEvm0,8705
25
- nv_ingest_api/internal/extract/image/infographic_extractor.py,sha256=yc9b2q_Ea08CEVclZ47UkpU4F7qlakPuU3UV9P013W0,8903
26
- nv_ingest_api/internal/extract/image/table_extractor.py,sha256=ivHaJxYjeHvFM1PZIpxVabPadxtcTsu51j398ZjMhD4,13123
25
+ nv_ingest_api/internal/extract/image/infographic_extractor.py,sha256=i7zt_ow1gytU4hK2JCRg7T1wlbokaeuUpXX69LIQkzY,9687
26
+ nv_ingest_api/internal/extract/image/table_extractor.py,sha256=djTRYgGpwhqWBaVFOerh8J6kVH-xGbUtIelcOFecx4o,13641
27
27
  nv_ingest_api/internal/extract/image/image_helpers/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
28
28
  nv_ingest_api/internal/extract/image/image_helpers/common.py,sha256=P8rcl4YPyeWeMJg7u1yejD3k9EnDVEbJgfYEnJ4WO5c,15025
29
29
  nv_ingest_api/internal/extract/pdf/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -31,11 +31,11 @@ nv_ingest_api/internal/extract/pdf/pdf_extractor.py,sha256=CxtWaD6mql9MEqSdk2CfS
31
31
  nv_ingest_api/internal/extract/pdf/engines/__init__.py,sha256=u4GnAZmDKRl0RwYGIRiozIRw70Kybw3A72-lcKFeoTI,582
32
32
  nv_ingest_api/internal/extract/pdf/engines/adobe.py,sha256=VT0dEqkU-y2uGkaCqxtKYov_Q8R1028UQVBchgMLca4,17466
33
33
  nv_ingest_api/internal/extract/pdf/engines/llama.py,sha256=PpKTqS8jGHBV6mKLGZWwjpfT8ga6Fy8ffrvL-gPAf2c,8182
34
- nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=XNYz4S2tMFBv0KFzXNERrVs-1raxJ_iIIXpBGlJFcD0,22987
35
- nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=vtdBue1EEQJsHcBuX3NdPutbLfyKPIzily6JOK6yV0w,22421
34
+ nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=IVbNcH_phMiRSxnkZ04pGfQrPJ-x1zVR3hXyhxv7juc,22977
35
+ nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=hUqLzQgCJOZIVXrP0JLH4jXLKDHQkXmLXh1Nc4KI3nI,23494
36
36
  nv_ingest_api/internal/extract/pdf/engines/tika.py,sha256=6GyR2l6EsgNZl9jnYDXLeKNK9Fj2Mw9y2UWDq-eSkOc,3169
37
37
  nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py,sha256=jrv2B4VZAH4PevAQrFz965qz8UyXq3rViiOTbGLejec,14908
38
- nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=Jk3wrQ2CZs167juvEZ-uV6qXWQjR08hhIu8otk2MWj4,4931
38
+ nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=4bvN6LsPksLicI6jM0JqbJFiOZNHEcuc8MVVW4XfgV8,5875
39
39
  nv_ingest_api/internal/extract/pptx/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
40
40
  nv_ingest_api/internal/extract/pptx/pptx_extractor.py,sha256=o-0P2dDyRFW37uQi_lKk6-eFozTcZvbq-2Y4I0EBMIY,7749
41
41
  nv_ingest_api/internal/extract/pptx/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -48,15 +48,15 @@ nv_ingest_api/internal/primitives/control_message_task.py,sha256=nWVB3QsP6p8BKwH
48
48
  nv_ingest_api/internal/primitives/ingest_control_message.py,sha256=rvipBiiUaHuRhupFCFDCG8rv0PylSJibCiJ7rDeb98A,8514
49
49
  nv_ingest_api/internal/primitives/nim/__init__.py,sha256=i_i_fBR2EcRCh2Y19DF6GM3s_Q0VPgo_thPnhEIJUyg,266
50
50
  nv_ingest_api/internal/primitives/nim/default_values.py,sha256=W92XjfyeC6uuVxut6J7p00x1kpNsnXIDb97gSVytZJk,380
51
- nv_ingest_api/internal/primitives/nim/nim_client.py,sha256=pvNxwMgNS7lFjXvLgRVUrL6FdXDGkfGnOGQh9WrpYzs,14858
51
+ nv_ingest_api/internal/primitives/nim/nim_client.py,sha256=rtzqwHpYsEPuzL7aGIdke5P3_Gu1Z8MbxKDIuJA-L8I,16336
52
52
  nv_ingest_api/internal/primitives/nim/nim_model_interface.py,sha256=wMEgoi79YQn_4338MVemkeZgM1J-vnz0aZWpvqDhib4,2392
53
53
  nv_ingest_api/internal/primitives/nim/model_interface/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
54
54
  nv_ingest_api/internal/primitives/nim/model_interface/cached.py,sha256=b1HX-PY1ExW5V6pXC1ZiHdobeG_BmbPr3rBbVJef13s,11003
55
55
  nv_ingest_api/internal/primitives/nim/model_interface/decorators.py,sha256=qwubkHs4WjnexM6rI0wkjWCsrVNEbA4Wjk2oKL9OYCU,1499
56
56
  nv_ingest_api/internal/primitives/nim/model_interface/deplot.py,sha256=TvKdk6PTuI1WNhRmNNrvygaI_DIutkJkDL-XdtLZQac,10787
57
- nv_ingest_api/internal/primitives/nim/model_interface/helpers.py,sha256=x35a9AyTYxpESQflLo_YnhVOKblQKVen6vGGFaXmNiE,9927
57
+ nv_ingest_api/internal/primitives/nim/model_interface/helpers.py,sha256=wgcwZJnIn3gKsqe4qhUa9V0gYp3NkIFV8R1qW7Zag1w,11571
58
58
  nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py,sha256=WysjDZeegclO3mZgVcGOwzWbr8wSI4pWRiYD4iC2EXo,7098
59
- nv_ingest_api/internal/primitives/nim/model_interface/paddle.py,sha256=rSUPwl5XOrqneoS6aKhatVjrNBg_LhP3nwUWS_aTwz0,17950
59
+ nv_ingest_api/internal/primitives/nim/model_interface/ocr.py,sha256=Vhim3py_rc5jA0BoKubwfekEqOwxUUePzcmc59pRuOk,21458
60
60
  nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py,sha256=5PqD2JuHY2rwd-6SSB4axr2Dd79vm95sAEkcmI3U7ME,12977
61
61
  nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py,sha256=lFhppNqrq5X_fzbCWKphvZQMzaJd3gHrkWsyJORzFrU,5010
62
62
  nv_ingest_api/internal/primitives/nim/model_interface/vlm.py,sha256=qJ382PU1ZrIM-SR3cqIhtY_W2rmHec2HIa2aUB2SvaU,6031
@@ -68,21 +68,21 @@ nv_ingest_api/internal/primitives/tracing/tagging.py,sha256=O5dD7Z7j43nrjqn0Axhx
68
68
  nv_ingest_api/internal/schemas/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
69
69
  nv_ingest_api/internal/schemas/extract/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
70
70
  nv_ingest_api/internal/schemas/extract/extract_audio_schema.py,sha256=W-nEBriqiNkjpaQ5AT_8LhtVXlW8AhlcftmoeQQtKAs,3812
71
- nv_ingest_api/internal/schemas/extract/extract_chart_schema.py,sha256=iu8lHQC0zbBB9VRK7PZisAVzpeSpFqjcXRAnwZ9OzoM,4301
71
+ nv_ingest_api/internal/schemas/extract/extract_chart_schema.py,sha256=wDcvQ5XtOjIBGSWtNjQPiPtVKrSOYqbf2mnLrhfhue4,4283
72
72
  nv_ingest_api/internal/schemas/extract/extract_docx_schema.py,sha256=M2N7WjMNvSemHcJHWeNUD_kFG0wC5VE2W3K6SVrJqvA,3761
73
73
  nv_ingest_api/internal/schemas/extract/extract_html_schema.py,sha256=lazpONTGZ6Fl420BGBAr6rogFGtlzBiZTc1uA694OIs,841
74
74
  nv_ingest_api/internal/schemas/extract/extract_image_schema.py,sha256=GC4xV8Z9TPLOuxlEtf2fbklSSp8ETGMrDpZgMQ02UwA,3766
75
- nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py,sha256=rl_hFDoJaJLTKbtnEpDSBj-73KQL9aUEVKGiW0IdXiU,3991
75
+ nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py,sha256=z42cs7w-U-IUCMGByp5e_iBUZ7KCl5vTIXkP64ty6gY,3973
76
76
  nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py,sha256=G9g1lEORmryUWTzDyZ0vHAuPnVMK7VaRx0E4xzmAw3Q,6589
77
77
  nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py,sha256=5dT0kv-Mmpe5KW-BZc1JOW3rUlgzVZI0rpB79NWytmw,3761
78
- nv_ingest_api/internal/schemas/extract/extract_table_schema.py,sha256=sbt3TvQrLsXc8-muKnsyOs4MfpA4VzrprYHdu1IrY8M,3950
78
+ nv_ingest_api/internal/schemas/extract/extract_table_schema.py,sha256=vd_1mf_LmQGvSTpQCuWr6ubsiav4TMhp_SpKGO-6RLc,3935
79
79
  nv_ingest_api/internal/schemas/message_brokers/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
80
80
  nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py,sha256=4xTSFE_vH7yZE9RRJRflFAG9hNXIaF6K020M_xA7ylw,1351
81
81
  nv_ingest_api/internal/schemas/message_brokers/request_schema.py,sha256=LZX_wXDxTamVFqTQs2Yd8uvWyPE5mddHAWSU4PtfEIQ,966
82
82
  nv_ingest_api/internal/schemas/message_brokers/response_schema.py,sha256=4b275HlzBSzpmuE2wdoeaGKPCdKki3wuWldtRIfrj8w,727
83
83
  nv_ingest_api/internal/schemas/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
84
84
  nv_ingest_api/internal/schemas/meta/base_model_noext.py,sha256=8hXU1uuiqZ6t8EsoZ8vlC5EFf2zSZrKEX133FcfZMwI,316
85
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=szDvgc2A_JetD2Jyewyl4ac4lwpy3NiLxD9dOYz42sM,8116
85
+ nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=ceYQjRjhBSDbbZ6q-Db7Y6GHVOvWPdGAMb3TX1vMWfY,8321
86
86
  nv_ingest_api/internal/schemas/meta/metadata_schema.py,sha256=VnAzkSFat_ckI19mlwQTlFrvP6EZVCwyNl9bt51b8oU,7193
87
87
  nv_ingest_api/internal/schemas/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
88
88
  nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py,sha256=k1JOdlPPpsipc0XhHf-9YxJ_-W0HvpVE1ZhYmr7fzj0,395
@@ -92,15 +92,15 @@ nv_ingest_api/internal/schemas/store/store_image_schema.py,sha256=p2LGij9i6sG6RY
92
92
  nv_ingest_api/internal/schemas/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
93
93
  nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=OtM1iPw26uioC3mghbOJQurKGg641uQfhASH462VqOY,578
94
94
  nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py,sha256=31ThI5fr0yyENeJeE1xMAA-pxk1QVJLwM842zMate_k,429
95
- nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=ongmHkJA2953f9_RI7ZYzf5BUnFzVL6Al5E8WKyfgw4,885
95
+ nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=RZCISA8CUqKiY8eJuk4uWxzo4PZ-fuYdzMO7_LYFkoM,1117
96
96
  nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py,sha256=D9K8tvu-tkEBQkZo7uuRzgrHdGyM3ZcNycHbHy5HV2E,791
97
97
  nv_ingest_api/internal/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
98
98
  nv_ingest_api/internal/store/embed_text_upload.py,sha256=maxb4FPsBvWgvlrjAPEBlRZEFdJX5NxPG-p8kUbzV7I,9898
99
99
  nv_ingest_api/internal/store/image_upload.py,sha256=GNlY4k3pfcHv3lzXxkbmGLeHFsf9PI25bkBn6Xn9h3I,9654
100
100
  nv_ingest_api/internal/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
101
101
  nv_ingest_api/internal/transform/caption_image.py,sha256=0ILCG2F8ESqKtZiPUM-6F1BHUflFZ76Dzi2GNzkE-lU,8517
102
- nv_ingest_api/internal/transform/embed_text.py,sha256=A8JMotTkC8KQ0pmz4AIJhaKebza6JzhQ0aEnHX2oHY8,16539
103
- nv_ingest_api/internal/transform/split_text.py,sha256=-kwpRWSVZrPldm1hn3-tVz_TkzuKM-kPvNU3HTp9zOY,7476
102
+ nv_ingest_api/internal/transform/embed_text.py,sha256=kvVGlNH1S91UENXWLD31uh3KzlfJYOlYitpIFMsyowU,20033
103
+ nv_ingest_api/internal/transform/split_text.py,sha256=LAtInGVuydH43UwjNMQWFVC1A6NdhXP_dZup2xX4qEo,7745
104
104
  nv_ingest_api/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
105
105
  nv_ingest_api/util/control_message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
106
106
  nv_ingest_api/util/control_message/validators.py,sha256=KvvbyheJ5rbzvJbH9JKpMR9VfoI0b0uM6eTAZte1p44,1315
@@ -122,8 +122,8 @@ nv_ingest_api/util/exception_handlers/schemas.py,sha256=NJngVNf9sk5Uz6CFFfkNO_LB
122
122
  nv_ingest_api/util/image_processing/__init__.py,sha256=Jiy8C1ZuSrNb_eBM1ZTV9IKFIsnjhZi6Ku3JJhVLimA,104
123
123
  nv_ingest_api/util/image_processing/clustering.py,sha256=sUGlZI4cx1q8h4Pns1N9JVpdfSM2BOH8zRmn9QFCtzI,9236
124
124
  nv_ingest_api/util/image_processing/processing.py,sha256=LSoDDEmahr7a-qSS12McVcowRe3dOrAZwa1h-PD_JPQ,6554
125
- nv_ingest_api/util/image_processing/table_and_chart.py,sha256=bxOu9PZYkG_WFCDGw_JLaO60S2pDSN8EOWK3xkIwr2A,14376
126
- nv_ingest_api/util/image_processing/transforms.py,sha256=CJVGQgUvHk_mzihR8ZZrvwJUBgUYcgFAKzXyRTmKdCE,23371
125
+ nv_ingest_api/util/image_processing/table_and_chart.py,sha256=idCIjiLkY-usI2EARchg3omWLtIYmYA-1tdUUV2lbno,16338
126
+ nv_ingest_api/util/image_processing/transforms.py,sha256=FBcORrvjimn3c1naaVxRMm6PMJ2Dt6Uy9AZRUxdbkR0,23829
127
127
  nv_ingest_api/util/imports/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
128
128
  nv_ingest_api/util/imports/callable_signatures.py,sha256=e2bJB1pmkN4Ee-Bf-VggOSBaQ4RXofWF5eKkWXgIj2U,1855
129
129
  nv_ingest_api/util/imports/dynamic_resolvers.py,sha256=7GByV_-8z2X0tnVoabCxVioxOP3sYMros3ZllVAW-wY,4343
@@ -131,11 +131,11 @@ nv_ingest_api/util/logging/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
131
131
  nv_ingest_api/util/logging/configuration.py,sha256=XUo7yON9V8IDPfN3x8RBwpZ3Gv4zrRq8QwsByf4dGNE,981
132
132
  nv_ingest_api/util/message_brokers/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
133
133
  nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py,sha256=WaQ3CWIpIKWEivT5kL-bkmzcSQKLGFNFHdXHUJjqZFs,325
134
- nv_ingest_api/util/message_brokers/simple_message_broker/broker.py,sha256=h9Q4q_alXGxCLNlJUZPan46q8fJ7B72sQy2eBfHdk6I,17265
134
+ nv_ingest_api/util/message_brokers/simple_message_broker/broker.py,sha256=PekxaxVcAa9k1wgUtozlr04SW3sAeqYJE-wdVBZf9eo,17264
135
135
  nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py,sha256=3p-LRqG8qLnsfEhBNf73_DG22C08JKahTqUvPLS2Apg,2554
136
- nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py,sha256=fh7Q0wO5H_FtrHV1VdT6V66aZNqglOh_2XdkfLt8hgg,15722
136
+ nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py,sha256=CCRAbq2EBH2quX9UTfuBbz3tTMDnWqhEF33roFwbyuk,16484
137
137
  nv_ingest_api/util/metadata/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
138
- nv_ingest_api/util/metadata/aggregators.py,sha256=Y5JSKuLhhk_ldpzT3eRIcVg7QM7cTNhfQZn4g5bcbq4,15884
138
+ nv_ingest_api/util/metadata/aggregators.py,sha256=YYdvJ1E04eGFZKKHUxXoH6mzLg8nor9Smvnv0qzqK5w,15988
139
139
  nv_ingest_api/util/multi_processing/__init__.py,sha256=4fojP8Rp_5Hu1YAkqGylqTyEZ-HBVVEunn5Z9I99swA,242
140
140
  nv_ingest_api/util/multi_processing/mp_pool_singleton.py,sha256=dTfP82DgGPaXEJH3jywTO8rNlLZUniD4FFzwv84_giE,7372
141
141
  nv_ingest_api/util/nim/__init__.py,sha256=UqbiXFCqjWcjNvoduXd_0gOUOGBT8JvppiYHOmMyneA,1775
@@ -153,8 +153,8 @@ nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=dZ-jrk7IK7oNtHoXFS
153
153
  nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
154
154
  nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
155
  nv_ingest_api/util/system/hardware_info.py,sha256=ORZeKpH9kSGU_vuPhyBwkIiMyCViKUX2CP__MCjrfbU,19463
156
- nv_ingest_api-2025.7.15.dev20250715.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
157
- nv_ingest_api-2025.7.15.dev20250715.dist-info/METADATA,sha256=OWZyeCR9DZ23SdT0RcMdodCkxR508CZZaVczdM3qXPE,13947
158
- nv_ingest_api-2025.7.15.dev20250715.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
- nv_ingest_api-2025.7.15.dev20250715.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
160
- nv_ingest_api-2025.7.15.dev20250715.dist-info/RECORD,,
156
+ nv_ingest_api-2025.7.17.dev20250717.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
157
+ nv_ingest_api-2025.7.17.dev20250717.dist-info/METADATA,sha256=NkhXN1NC8IKy8OWMV5uJHFdcHY8BOj0dQ2IwnvidETk,13947
158
+ nv_ingest_api-2025.7.17.dev20250717.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
+ nv_ingest_api-2025.7.17.dev20250717.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
160
+ nv_ingest_api-2025.7.17.dev20250717.dist-info/RECORD,,