nv-ingest-api 2025.4.17.dev20250417__py3-none-any.whl → 2025.4.19.dev20250419__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/__init__.py +0 -3
- nv_ingest_api/{internal/primitives → primitives}/control_message_task.py +0 -4
- nv_ingest_api/{internal/primitives → primitives}/ingest_control_message.py +2 -5
- {nv_ingest_api-2025.4.17.dev20250417.dist-info → nv_ingest_api-2025.4.19.dev20250419.dist-info}/METADATA +1 -1
- nv_ingest_api-2025.4.19.dev20250419.dist-info/RECORD +9 -0
- {nv_ingest_api-2025.4.17.dev20250417.dist-info → nv_ingest_api-2025.4.19.dev20250419.dist-info}/WHEEL +1 -1
- nv_ingest_api/interface/__init__.py +0 -215
- nv_ingest_api/interface/extract.py +0 -972
- nv_ingest_api/interface/mutate.py +0 -154
- nv_ingest_api/interface/store.py +0 -218
- nv_ingest_api/interface/transform.py +0 -382
- nv_ingest_api/interface/utility.py +0 -200
- nv_ingest_api/internal/enums/__init__.py +0 -3
- nv_ingest_api/internal/enums/common.py +0 -494
- nv_ingest_api/internal/extract/__init__.py +0 -3
- nv_ingest_api/internal/extract/audio/__init__.py +0 -3
- nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -149
- nv_ingest_api/internal/extract/docx/__init__.py +0 -5
- nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -205
- nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
- nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -3
- nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -122
- nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -895
- nv_ingest_api/internal/extract/image/__init__.py +0 -3
- nv_ingest_api/internal/extract/image/chart_extractor.py +0 -353
- nv_ingest_api/internal/extract/image/image_extractor.py +0 -204
- nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -3
- nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -403
- nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -253
- nv_ingest_api/internal/extract/image/table_extractor.py +0 -344
- nv_ingest_api/internal/extract/pdf/__init__.py +0 -3
- nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -19
- nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -484
- nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -243
- nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -597
- nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -146
- nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -603
- nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -96
- nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -426
- nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -74
- nv_ingest_api/internal/extract/pptx/__init__.py +0 -5
- nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
- nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -799
- nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -187
- nv_ingest_api/internal/mutate/__init__.py +0 -3
- nv_ingest_api/internal/mutate/deduplicate.py +0 -110
- nv_ingest_api/internal/mutate/filter.py +0 -133
- nv_ingest_api/internal/primitives/__init__.py +0 -0
- nv_ingest_api/internal/primitives/nim/__init__.py +0 -8
- nv_ingest_api/internal/primitives/nim/default_values.py +0 -15
- nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -3
- nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -274
- nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -56
- nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -270
- nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -275
- nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -238
- nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +0 -462
- nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -367
- nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -132
- nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -152
- nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -1400
- nv_ingest_api/internal/primitives/nim/nim_client.py +0 -344
- nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -81
- nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
- nv_ingest_api/internal/primitives/tracing/latency.py +0 -69
- nv_ingest_api/internal/primitives/tracing/logging.py +0 -96
- nv_ingest_api/internal/primitives/tracing/tagging.py +0 -197
- nv_ingest_api/internal/schemas/__init__.py +0 -3
- nv_ingest_api/internal/schemas/extract/__init__.py +0 -3
- nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -130
- nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -135
- nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -124
- nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -124
- nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -128
- nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -218
- nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -124
- nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -129
- nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -3
- nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -23
- nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -34
- nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -19
- nv_ingest_api/internal/schemas/meta/__init__.py +0 -3
- nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -11
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +0 -237
- nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -221
- nv_ingest_api/internal/schemas/mutate/__init__.py +0 -3
- nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -16
- nv_ingest_api/internal/schemas/store/__init__.py +0 -3
- nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -28
- nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -30
- nv_ingest_api/internal/schemas/transform/__init__.py +0 -3
- nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +0 -15
- nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -17
- nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +0 -25
- nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -22
- nv_ingest_api/internal/store/__init__.py +0 -3
- nv_ingest_api/internal/store/embed_text_upload.py +0 -236
- nv_ingest_api/internal/store/image_upload.py +0 -232
- nv_ingest_api/internal/transform/__init__.py +0 -3
- nv_ingest_api/internal/transform/caption_image.py +0 -205
- nv_ingest_api/internal/transform/embed_text.py +0 -496
- nv_ingest_api/internal/transform/split_text.py +0 -157
- nv_ingest_api/util/__init__.py +0 -0
- nv_ingest_api/util/control_message/__init__.py +0 -0
- nv_ingest_api/util/control_message/validators.py +0 -47
- nv_ingest_api/util/converters/__init__.py +0 -0
- nv_ingest_api/util/converters/bytetools.py +0 -78
- nv_ingest_api/util/converters/containers.py +0 -65
- nv_ingest_api/util/converters/datetools.py +0 -90
- nv_ingest_api/util/converters/dftools.py +0 -127
- nv_ingest_api/util/converters/formats.py +0 -64
- nv_ingest_api/util/converters/type_mappings.py +0 -27
- nv_ingest_api/util/detectors/__init__.py +0 -5
- nv_ingest_api/util/detectors/language.py +0 -38
- nv_ingest_api/util/exception_handlers/__init__.py +0 -0
- nv_ingest_api/util/exception_handlers/converters.py +0 -72
- nv_ingest_api/util/exception_handlers/decorators.py +0 -223
- nv_ingest_api/util/exception_handlers/detectors.py +0 -74
- nv_ingest_api/util/exception_handlers/pdf.py +0 -116
- nv_ingest_api/util/exception_handlers/schemas.py +0 -68
- nv_ingest_api/util/image_processing/__init__.py +0 -5
- nv_ingest_api/util/image_processing/clustering.py +0 -260
- nv_ingest_api/util/image_processing/processing.py +0 -179
- nv_ingest_api/util/image_processing/table_and_chart.py +0 -449
- nv_ingest_api/util/image_processing/transforms.py +0 -407
- nv_ingest_api/util/logging/__init__.py +0 -0
- nv_ingest_api/util/logging/configuration.py +0 -31
- nv_ingest_api/util/message_brokers/__init__.py +0 -3
- nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -9
- nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -465
- nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -71
- nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -435
- nv_ingest_api/util/metadata/__init__.py +0 -5
- nv_ingest_api/util/metadata/aggregators.py +0 -469
- nv_ingest_api/util/multi_processing/__init__.py +0 -8
- nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -194
- nv_ingest_api/util/nim/__init__.py +0 -56
- nv_ingest_api/util/pdf/__init__.py +0 -3
- nv_ingest_api/util/pdf/pdfium.py +0 -427
- nv_ingest_api/util/schema/__init__.py +0 -0
- nv_ingest_api/util/schema/schema_validator.py +0 -10
- nv_ingest_api/util/service_clients/__init__.py +0 -3
- nv_ingest_api/util/service_clients/client_base.py +0 -72
- nv_ingest_api/util/service_clients/kafka/__init__.py +0 -3
- nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
- nv_ingest_api/util/service_clients/redis/redis_client.py +0 -334
- nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
- nv_ingest_api/util/service_clients/rest/rest_client.py +0 -398
- nv_ingest_api/util/string_processing/__init__.py +0 -51
- nv_ingest_api-2025.4.17.dev20250417.dist-info/RECORD +0 -152
- /nv_ingest_api/{internal → primitives}/__init__.py +0 -0
- {nv_ingest_api-2025.4.17.dev20250417.dist-info → nv_ingest_api-2025.4.19.dev20250419.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.4.17.dev20250417.dist-info → nv_ingest_api-2025.4.19.dev20250419.dist-info}/top_level.txt +0 -0
|
@@ -1,398 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
|
|
5
|
-
# pylint: skip-file
|
|
6
|
-
|
|
7
|
-
import logging
|
|
8
|
-
import re
|
|
9
|
-
import time
|
|
10
|
-
from typing import Any
|
|
11
|
-
|
|
12
|
-
import httpx
|
|
13
|
-
import requests
|
|
14
|
-
|
|
15
|
-
from nv_ingest_api.internal.schemas.message_brokers.response_schema import ResponseSchema
|
|
16
|
-
from nv_ingest_api.util.service_clients.client_base import MessageBrokerClientBase
|
|
17
|
-
|
|
18
|
-
logger = logging.getLogger(__name__)
|
|
19
|
-
|
|
20
|
-
# HTTP Response Statuses that result in marking submission as failed
|
|
21
|
-
# 4XX - Any 4XX status is considered a client derived error and will result in failure
|
|
22
|
-
# 5XX - Not all 500's are terminal but most are. Those which are listed below
|
|
23
|
-
_TERMINAL_RESPONSE_STATUSES = [
|
|
24
|
-
400,
|
|
25
|
-
401,
|
|
26
|
-
402,
|
|
27
|
-
403,
|
|
28
|
-
404,
|
|
29
|
-
405,
|
|
30
|
-
406,
|
|
31
|
-
407,
|
|
32
|
-
408,
|
|
33
|
-
409,
|
|
34
|
-
410,
|
|
35
|
-
411,
|
|
36
|
-
412,
|
|
37
|
-
413,
|
|
38
|
-
414,
|
|
39
|
-
415,
|
|
40
|
-
416,
|
|
41
|
-
417,
|
|
42
|
-
418,
|
|
43
|
-
421,
|
|
44
|
-
422,
|
|
45
|
-
423,
|
|
46
|
-
424,
|
|
47
|
-
425,
|
|
48
|
-
426,
|
|
49
|
-
428,
|
|
50
|
-
429,
|
|
51
|
-
431,
|
|
52
|
-
451,
|
|
53
|
-
500,
|
|
54
|
-
501,
|
|
55
|
-
503,
|
|
56
|
-
505,
|
|
57
|
-
506,
|
|
58
|
-
507,
|
|
59
|
-
508,
|
|
60
|
-
510,
|
|
61
|
-
511,
|
|
62
|
-
]
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class RestClient(MessageBrokerClientBase):
|
|
66
|
-
"""
|
|
67
|
-
A client for interfacing with the nv-ingest HTTP endpoint, providing mechanisms for sending and receiving messages
|
|
68
|
-
with retry logic and connection management.
|
|
69
|
-
|
|
70
|
-
Parameters
|
|
71
|
-
----------
|
|
72
|
-
host : str
|
|
73
|
-
The hostname of the HTTP server.
|
|
74
|
-
port : int
|
|
75
|
-
The port number of the HTTP server.
|
|
76
|
-
max_retries : int, optional
|
|
77
|
-
The maximum number of retry attempts for operations. Default is 0 (no retries).
|
|
78
|
-
max_backoff : int, optional
|
|
79
|
-
The maximum backoff delay between retries in seconds. Default is 32 seconds.
|
|
80
|
-
connection_timeout : int, optional
|
|
81
|
-
The timeout in seconds for connecting to the HTTP server. Default is 300 seconds.
|
|
82
|
-
http_allocator : Any, optional
|
|
83
|
-
The HTTP client allocator.
|
|
84
|
-
|
|
85
|
-
Attributes
|
|
86
|
-
----------
|
|
87
|
-
client : Any
|
|
88
|
-
The HTTP client instance used for operations.
|
|
89
|
-
"""
|
|
90
|
-
|
|
91
|
-
def __init__(
|
|
92
|
-
self,
|
|
93
|
-
host: str,
|
|
94
|
-
port: int,
|
|
95
|
-
max_retries: int = 0,
|
|
96
|
-
max_backoff: int = 32,
|
|
97
|
-
connection_timeout: int = 300,
|
|
98
|
-
http_allocator: Any = httpx.AsyncClient,
|
|
99
|
-
**kwargs,
|
|
100
|
-
):
|
|
101
|
-
self._host = host
|
|
102
|
-
self._port = port
|
|
103
|
-
self._max_retries = max_retries
|
|
104
|
-
self._max_backoff = max_backoff
|
|
105
|
-
self._connection_timeout = connection_timeout
|
|
106
|
-
self._http_allocator = http_allocator
|
|
107
|
-
self._client = self._http_allocator()
|
|
108
|
-
self._retries = 0
|
|
109
|
-
|
|
110
|
-
self._submit_endpoint = "/v1/submit_job"
|
|
111
|
-
self._fetch_endpoint = "/v1/fetch_job"
|
|
112
|
-
|
|
113
|
-
if "base_url" in kwargs:
|
|
114
|
-
logger.debug("Using custom base_url; ignoring host and port")
|
|
115
|
-
|
|
116
|
-
self._base_url = kwargs.get("base_url") or self.generate_url(self._host, self._port)
|
|
117
|
-
self._headers = kwargs.get("headers", {})
|
|
118
|
-
self._auth = kwargs.get("auth", None)
|
|
119
|
-
|
|
120
|
-
def _connect(self) -> None:
|
|
121
|
-
"""
|
|
122
|
-
Attempts to reconnect to the HTTP server if the current connection is not responsive.
|
|
123
|
-
"""
|
|
124
|
-
ping_result = self.ping()
|
|
125
|
-
|
|
126
|
-
if ping_result.response_code != 0:
|
|
127
|
-
logger.debug("Reconnecting to HTTP server")
|
|
128
|
-
self._client = self._http_allocator()
|
|
129
|
-
|
|
130
|
-
@property
|
|
131
|
-
def max_retries(self) -> int:
|
|
132
|
-
return self._max_retries
|
|
133
|
-
|
|
134
|
-
@max_retries.setter
|
|
135
|
-
def max_retries(self, value: int) -> None:
|
|
136
|
-
self._max_retries = value
|
|
137
|
-
|
|
138
|
-
def get_client(self) -> Any:
|
|
139
|
-
"""
|
|
140
|
-
Returns a HTTP client instance, reconnecting if necessary.
|
|
141
|
-
|
|
142
|
-
Returns
|
|
143
|
-
-------
|
|
144
|
-
Any
|
|
145
|
-
The HTTP client instance.
|
|
146
|
-
"""
|
|
147
|
-
if self._client is None:
|
|
148
|
-
self._connect()
|
|
149
|
-
return self._client
|
|
150
|
-
|
|
151
|
-
def ping(self) -> ResponseSchema:
|
|
152
|
-
"""
|
|
153
|
-
Checks if the HTTP server is responsive.
|
|
154
|
-
|
|
155
|
-
Returns
|
|
156
|
-
-------
|
|
157
|
-
bool
|
|
158
|
-
True if the server responds to a ping, False otherwise.
|
|
159
|
-
"""
|
|
160
|
-
try:
|
|
161
|
-
# Implement a simple GET request to a health endpoint or root
|
|
162
|
-
self._client.ping()
|
|
163
|
-
return ResponseSchema(response_code=0)
|
|
164
|
-
except (httpx.HTTPError, AttributeError):
|
|
165
|
-
return ResponseSchema(response_code=1, response_reason="Failed to ping HTTP server")
|
|
166
|
-
|
|
167
|
-
@staticmethod
|
|
168
|
-
def generate_url(user_provided_url, user_provided_port) -> str:
|
|
169
|
-
"""Examines the user defined URL for http*://. If that
|
|
170
|
-
pattern is detected the URL is used as provided by the user.
|
|
171
|
-
If that pattern does not exist then the assumption is made that
|
|
172
|
-
the endpoint is simply `http://` and that is prepended
|
|
173
|
-
to the user supplied endpoint.
|
|
174
|
-
|
|
175
|
-
Args:
|
|
176
|
-
user_provided_url str: Endpoint where the Rest service is running
|
|
177
|
-
|
|
178
|
-
Returns:
|
|
179
|
-
str: Fully validated URL
|
|
180
|
-
"""
|
|
181
|
-
if not re.match(r"^https?://", user_provided_url):
|
|
182
|
-
# Add the default `http://` if it's not already present in the URL
|
|
183
|
-
user_provided_url = f"http://{user_provided_url}:{user_provided_port}"
|
|
184
|
-
else:
|
|
185
|
-
user_provided_url = f"{user_provided_url}:{user_provided_port}"
|
|
186
|
-
return user_provided_url
|
|
187
|
-
|
|
188
|
-
def fetch_message(self, job_id: str, timeout: float = (10, 600)) -> ResponseSchema:
|
|
189
|
-
"""
|
|
190
|
-
Fetches a message from the specified queue with retries on failure, handling streaming HTTP responses.
|
|
191
|
-
|
|
192
|
-
Parameters
|
|
193
|
-
----------
|
|
194
|
-
job_id : str
|
|
195
|
-
The server-side job identifier.
|
|
196
|
-
timeout : float
|
|
197
|
-
The timeout in seconds for blocking until a message is available.
|
|
198
|
-
|
|
199
|
-
Returns
|
|
200
|
-
-------
|
|
201
|
-
ResponseSchema
|
|
202
|
-
The fetched message wrapped in a ResponseSchema object.
|
|
203
|
-
"""
|
|
204
|
-
retries = 0
|
|
205
|
-
url = f"{self._base_url}{self._fetch_endpoint}/{job_id}"
|
|
206
|
-
|
|
207
|
-
# Ensure headers are included
|
|
208
|
-
headers = {"Content-Type": "application/json"}
|
|
209
|
-
headers.update(self._headers)
|
|
210
|
-
|
|
211
|
-
while True:
|
|
212
|
-
try:
|
|
213
|
-
logger.debug(f"Invoking fetch_message http endpoint @ '{url}'")
|
|
214
|
-
|
|
215
|
-
# Fetch using streaming response
|
|
216
|
-
with requests.get(
|
|
217
|
-
url,
|
|
218
|
-
timeout=(30, 600),
|
|
219
|
-
stream=True,
|
|
220
|
-
headers=headers,
|
|
221
|
-
auth=self._auth,
|
|
222
|
-
) as result:
|
|
223
|
-
response_code = result.status_code
|
|
224
|
-
|
|
225
|
-
if response_code in _TERMINAL_RESPONSE_STATUSES:
|
|
226
|
-
# Terminal response code; return error ResponseSchema
|
|
227
|
-
return ResponseSchema(
|
|
228
|
-
response_code=1,
|
|
229
|
-
response_reason=(
|
|
230
|
-
f"Terminal response code {response_code} received when fetching JobSpec: {job_id}"
|
|
231
|
-
),
|
|
232
|
-
response=result.text,
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
if response_code == 200:
|
|
236
|
-
# Handle streaming response, reconstructing payload incrementally
|
|
237
|
-
response_chunks = []
|
|
238
|
-
for chunk in result.iter_content(chunk_size=1024 * 1024): # 1MB chunks
|
|
239
|
-
if chunk:
|
|
240
|
-
response_chunks.append(chunk)
|
|
241
|
-
full_response = b"".join(response_chunks).decode("utf-8")
|
|
242
|
-
|
|
243
|
-
return ResponseSchema(
|
|
244
|
-
response_code=0,
|
|
245
|
-
response_reason="OK",
|
|
246
|
-
response=full_response,
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
elif response_code == 202:
|
|
250
|
-
# Job is not ready yet
|
|
251
|
-
return ResponseSchema(
|
|
252
|
-
response_code=1,
|
|
253
|
-
response_reason="Job is not ready yet. Retry later.",
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
else:
|
|
257
|
-
try:
|
|
258
|
-
# Retry the operation
|
|
259
|
-
retries = self.perform_retry_backoff(retries)
|
|
260
|
-
except RuntimeError as rte:
|
|
261
|
-
raise rte
|
|
262
|
-
|
|
263
|
-
except (ConnectionError, requests.HTTPError, requests.exceptions.ConnectionError) as err:
|
|
264
|
-
logger.error(f"Error during fetching, retrying... Error: {err}")
|
|
265
|
-
self._client = None # Invalidate client to force reconnection
|
|
266
|
-
if "Connection refused" in str(err):
|
|
267
|
-
logger.debug(
|
|
268
|
-
"Connection refused encountered during fetch; sleeping for 10 seconds before retrying."
|
|
269
|
-
)
|
|
270
|
-
time.sleep(10)
|
|
271
|
-
try:
|
|
272
|
-
retries = self.perform_retry_backoff(retries)
|
|
273
|
-
except RuntimeError as rte:
|
|
274
|
-
# Max retries reached
|
|
275
|
-
return ResponseSchema(response_code=1, response_reason=str(rte), response=str(err))
|
|
276
|
-
except TimeoutError:
|
|
277
|
-
raise
|
|
278
|
-
except Exception as e:
|
|
279
|
-
# Handle non-http specific exceptions
|
|
280
|
-
logger.error(f"Unexpected error during fetch from {url}: {e}")
|
|
281
|
-
return ResponseSchema(
|
|
282
|
-
response_code=1, response_reason=f"Unexpected error during fetch: {e}", response=None
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
def submit_message(self, channel_name: str, message: str, for_nv_ingest: bool = False) -> ResponseSchema:
|
|
286
|
-
"""
|
|
287
|
-
Submits a JobSpec to a specified HTTP endpoint with retries on failure.
|
|
288
|
-
|
|
289
|
-
Parameters
|
|
290
|
-
----------
|
|
291
|
-
channel_name : str
|
|
292
|
-
Not used as part of RestClient but defined in MessageClientBase.
|
|
293
|
-
message : str
|
|
294
|
-
The message to submit.
|
|
295
|
-
for_nv_ingest : bool
|
|
296
|
-
Not used as part of RestClient but defined in MessageClientBase.
|
|
297
|
-
|
|
298
|
-
Returns
|
|
299
|
-
-------
|
|
300
|
-
ResponseSchema
|
|
301
|
-
The response from the server wrapped in a ResponseSchema object.
|
|
302
|
-
"""
|
|
303
|
-
retries = 0
|
|
304
|
-
url = f"{self._base_url}{self._submit_endpoint}"
|
|
305
|
-
|
|
306
|
-
# Ensure content-type is present
|
|
307
|
-
headers = {"Content-Type": "application/json"}
|
|
308
|
-
headers.update(self._headers)
|
|
309
|
-
|
|
310
|
-
while True:
|
|
311
|
-
try:
|
|
312
|
-
# Submit via HTTP
|
|
313
|
-
result = requests.post(
|
|
314
|
-
url,
|
|
315
|
-
json={"payload": message},
|
|
316
|
-
headers=headers,
|
|
317
|
-
auth=self._auth,
|
|
318
|
-
timeout=self._connection_timeout,
|
|
319
|
-
)
|
|
320
|
-
|
|
321
|
-
response_code = result.status_code
|
|
322
|
-
if response_code in _TERMINAL_RESPONSE_STATUSES:
|
|
323
|
-
# Terminal response code; return error ResponseSchema
|
|
324
|
-
return ResponseSchema(
|
|
325
|
-
response_code=1,
|
|
326
|
-
response_reason=f"Terminal response code {response_code} received when submitting JobSpec",
|
|
327
|
-
trace_id=result.headers.get("x-trace-id"),
|
|
328
|
-
)
|
|
329
|
-
else:
|
|
330
|
-
# If 200 we are good, otherwise let's try again
|
|
331
|
-
if response_code == 200:
|
|
332
|
-
logger.debug(f"JobSpec successfully submitted to http endpoint {self._submit_endpoint}")
|
|
333
|
-
# The REST interface returns a JobId, so we capture that here
|
|
334
|
-
x_trace_id = result.headers.get("x-trace-id")
|
|
335
|
-
return ResponseSchema(
|
|
336
|
-
response_code=0,
|
|
337
|
-
response_reason="OK",
|
|
338
|
-
response=result.text,
|
|
339
|
-
transaction_id=result.text,
|
|
340
|
-
trace_id=x_trace_id,
|
|
341
|
-
)
|
|
342
|
-
else:
|
|
343
|
-
# Retry the operation
|
|
344
|
-
retries = self.perform_retry_backoff(retries)
|
|
345
|
-
except requests.RequestException as e:
|
|
346
|
-
logger.error(f"Failed to submit job, retrying... Error: {e}")
|
|
347
|
-
self._client = None # Invalidate client to force reconnection
|
|
348
|
-
if "Connection refused" in str(e):
|
|
349
|
-
logger.debug(
|
|
350
|
-
"Connection refused encountered during submission; sleeping for 10 seconds before retrying."
|
|
351
|
-
)
|
|
352
|
-
time.sleep(10)
|
|
353
|
-
try:
|
|
354
|
-
retries = self.perform_retry_backoff(retries)
|
|
355
|
-
except RuntimeError as rte:
|
|
356
|
-
# Max retries reached
|
|
357
|
-
return ResponseSchema(response_code=1, response_reason=str(rte), response=str(e))
|
|
358
|
-
except Exception as e:
|
|
359
|
-
# Handle non-http specific exceptions
|
|
360
|
-
logger.error(f"Unexpected error during submission of JobSpec to {url}: {e}")
|
|
361
|
-
return ResponseSchema(
|
|
362
|
-
response_code=1, response_reason=f"Unexpected error during JobSpec submission: {e}", response=None
|
|
363
|
-
)
|
|
364
|
-
|
|
365
|
-
def perform_retry_backoff(self, existing_retries) -> int:
|
|
366
|
-
"""
|
|
367
|
-
Attempts to perform a backoff retry delay. This function accepts the
|
|
368
|
-
current number of retries that have been attempted and compares
|
|
369
|
-
that with the maximum number of retries allowed. If the current
|
|
370
|
-
number of retries exceeds the max then a RuntimeError is raised.
|
|
371
|
-
|
|
372
|
-
Parameters
|
|
373
|
-
----------
|
|
374
|
-
existing_retries : int
|
|
375
|
-
The number of retries that have been attempted for this operation thus far
|
|
376
|
-
|
|
377
|
-
Returns
|
|
378
|
-
-------
|
|
379
|
-
int
|
|
380
|
-
The updated number of retry attempts that have been made for this operation
|
|
381
|
-
|
|
382
|
-
Raises
|
|
383
|
-
------
|
|
384
|
-
RuntimeError
|
|
385
|
-
Raised if the maximum number of retry attempts has been reached.
|
|
386
|
-
"""
|
|
387
|
-
backoff_delay = min(2**existing_retries, self._max_backoff)
|
|
388
|
-
logger.debug(
|
|
389
|
-
f"Retry #: {existing_retries} of max_retries: {self.max_retries} | "
|
|
390
|
-
f"current backoff_delay: {backoff_delay}s of max_backoff: {self._max_backoff}s"
|
|
391
|
-
)
|
|
392
|
-
|
|
393
|
-
if self.max_retries > 0 and existing_retries < self.max_retries:
|
|
394
|
-
logger.error(f"Operation failed, retrying in {backoff_delay}s...")
|
|
395
|
-
time.sleep(backoff_delay)
|
|
396
|
-
return existing_retries + 1
|
|
397
|
-
else:
|
|
398
|
-
raise RuntimeError(f"Max retry attempts of {self.max_retries} reached")
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
|
|
5
|
-
import logging
|
|
6
|
-
import re
|
|
7
|
-
|
|
8
|
-
logger = logging.getLogger(__name__)
|
|
9
|
-
|
|
10
|
-
DEPLOT_MAX_TOKENS = 128
|
|
11
|
-
DEPLOT_TEMPERATURE = 1.0
|
|
12
|
-
DEPLOT_TOP_P = 1.0
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def remove_url_endpoints(url) -> str:
|
|
16
|
-
"""Some configurations provide the full endpoint in the URL.
|
|
17
|
-
Ex: http://deplot:8000/v1/chat/completions. For hitting the
|
|
18
|
-
health endpoint we need to get just the hostname:port combo
|
|
19
|
-
that we can append the health/ready endpoint to so we attempt
|
|
20
|
-
to parse that information here.
|
|
21
|
-
|
|
22
|
-
Args:
|
|
23
|
-
url str: Incoming URL
|
|
24
|
-
|
|
25
|
-
Returns:
|
|
26
|
-
str: URL with just the hostname:port portion remaining
|
|
27
|
-
"""
|
|
28
|
-
if "/v1" in url:
|
|
29
|
-
url = url.split("/v1")[0]
|
|
30
|
-
|
|
31
|
-
return url
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def generate_url(url) -> str:
|
|
35
|
-
"""Examines the user defined URL for http*://. If that
|
|
36
|
-
pattern is detected the URL is used as provided by the user.
|
|
37
|
-
If that pattern does not exist then the assumption is made that
|
|
38
|
-
the endpoint is simply `http://` and that is prepended
|
|
39
|
-
to the user supplied endpoint.
|
|
40
|
-
|
|
41
|
-
Args:
|
|
42
|
-
url str: Endpoint where the Rest service is running
|
|
43
|
-
|
|
44
|
-
Returns:
|
|
45
|
-
str: Fully validated URL
|
|
46
|
-
"""
|
|
47
|
-
if not re.match(r"^https?://", url):
|
|
48
|
-
# Add the default `http://` if it's not already present in the URL
|
|
49
|
-
url = f"http://{url}"
|
|
50
|
-
|
|
51
|
-
return url
|
|
@@ -1,152 +0,0 @@
|
|
|
1
|
-
nv_ingest_api/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
2
|
-
nv_ingest_api/interface/__init__.py,sha256=ltWlfmtCewHSRK4B7DF__QvlSUPuliz58JEcEIeIgI0,10134
|
|
3
|
-
nv_ingest_api/interface/extract.py,sha256=GyBfXKKTGwSb-y0k0nMiTf4HcCT2E-lxLY4aMYAPeOI,38815
|
|
4
|
-
nv_ingest_api/interface/mutate.py,sha256=eZkd3sbHEJQiEPJyMbhewlPxQNMnL_Xur15icclnb-U,5934
|
|
5
|
-
nv_ingest_api/interface/store.py,sha256=aR3Cf19lq9Yo9AHlAy1VVcrOP2dgyN01yYhwxyTprkQ,8207
|
|
6
|
-
nv_ingest_api/interface/transform.py,sha256=g6YnFR7TpEU0xNtzCvv6kqnFbuCwQ6vRMjjBxz3G4n4,15815
|
|
7
|
-
nv_ingest_api/interface/utility.py,sha256=oXHV2Miz2BKviQg5vOVfiGSvPs2fKJsPDmnxe3fJL9c,7857
|
|
8
|
-
nv_ingest_api/internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
nv_ingest_api/internal/enums/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
10
|
-
nv_ingest_api/internal/enums/common.py,sha256=HSj7qqNr6KXu_FIyK_Wvel24R-r8lV7dLA173z5XFBc,12321
|
|
11
|
-
nv_ingest_api/internal/extract/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
|
|
12
|
-
nv_ingest_api/internal/extract/audio/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
13
|
-
nv_ingest_api/internal/extract/audio/audio_extraction.py,sha256=L8cK7xB6QTaSx8gsrdyaYHYh0HpW6lycGfduCk7XSMg,5364
|
|
14
|
-
nv_ingest_api/internal/extract/docx/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
|
|
15
|
-
nv_ingest_api/internal/extract/docx/docx_extractor.py,sha256=lzZPSa-oHBmLk7ynop5aOLM2rVbYAbzSkPqVlAuT8RE,8319
|
|
16
|
-
nv_ingest_api/internal/extract/docx/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
|
|
18
|
-
nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py,sha256=1wkciAxu8lz9WuPuoleJFy2s09ieSzXl1S71F9r0BWA,4385
|
|
19
|
-
nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py,sha256=CM2yV8lfEw1F1ORAjupD4gyIKX0PDDJrL3nsZ5Mnrgg,31539
|
|
20
|
-
nv_ingest_api/internal/extract/image/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
21
|
-
nv_ingest_api/internal/extract/image/chart_extractor.py,sha256=Jy_fNmDbZcdni55Fq7vT6NdbYnCyGoyw0J7QjpK-KPc,13315
|
|
22
|
-
nv_ingest_api/internal/extract/image/image_extractor.py,sha256=ocLvlVMzO9CQvduxbjupOeKxnt2aq1_CzJCqcdD-loo,8783
|
|
23
|
-
nv_ingest_api/internal/extract/image/infographic_extractor.py,sha256=k4Z6JwsoNKsyfmpaQkN_dxJpAv9-RVsRL1BfSWUtXTM,8908
|
|
24
|
-
nv_ingest_api/internal/extract/image/table_extractor.py,sha256=80FQef4Dsn6__MNIRCQzFf32s4wUyTOzBFgmA84JZJk,13133
|
|
25
|
-
nv_ingest_api/internal/extract/image/image_helpers/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
26
|
-
nv_ingest_api/internal/extract/image/image_helpers/common.py,sha256=NU8TEU9p2aIL_KppyhtTgRUPqD4MsanxATG19rKhGjw,15032
|
|
27
|
-
nv_ingest_api/internal/extract/pdf/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
28
|
-
nv_ingest_api/internal/extract/pdf/pdf_extractor.py,sha256=CxtWaD6mql9MEqSdk2CfSQ9T-Bn87beBkCOuGGjxGt8,2934
|
|
29
|
-
nv_ingest_api/internal/extract/pdf/engines/__init__.py,sha256=u4GnAZmDKRl0RwYGIRiozIRw70Kybw3A72-lcKFeoTI,582
|
|
30
|
-
nv_ingest_api/internal/extract/pdf/engines/adobe.py,sha256=VT0dEqkU-y2uGkaCqxtKYov_Q8R1028UQVBchgMLca4,17466
|
|
31
|
-
nv_ingest_api/internal/extract/pdf/engines/llama.py,sha256=PpKTqS8jGHBV6mKLGZWwjpfT8ga6Fy8ffrvL-gPAf2c,8182
|
|
32
|
-
nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=F8hZdqYRr0CTNeIRJIG6H__CCh_3GWQ4_ySCM0WPLPU,22913
|
|
33
|
-
nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=jUcquCWbyQPNCHZLaV-XnVqUFsajX4YxVFCiWWwD4QQ,22367
|
|
34
|
-
nv_ingest_api/internal/extract/pdf/engines/tika.py,sha256=6GyR2l6EsgNZl9jnYDXLeKNK9Fj2Mw9y2UWDq-eSkOc,3169
|
|
35
|
-
nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py,sha256=jrv2B4VZAH4PevAQrFz965qz8UyXq3rViiOTbGLejec,14908
|
|
36
|
-
nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=Jk3wrQ2CZs167juvEZ-uV6qXWQjR08hhIu8otk2MWj4,4931
|
|
37
|
-
nv_ingest_api/internal/extract/pptx/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
|
|
38
|
-
nv_ingest_api/internal/extract/pptx/pptx_extractor.py,sha256=15gU7NtTmTwr1ml679gABQABXI463ZKoqPOh31EK98s,7867
|
|
39
|
-
nv_ingest_api/internal/extract/pptx/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
|
-
nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py,sha256=tmUXw4H35o6dMcsS73Q6L_zd-qDqwCshTGfCv_V610c,28435
|
|
41
|
-
nv_ingest_api/internal/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
42
|
-
nv_ingest_api/internal/mutate/deduplicate.py,sha256=hmvTTGevpCtlkM_wVZSoc8-Exr6rUJwqLjoEnbPcPzY,3849
|
|
43
|
-
nv_ingest_api/internal/mutate/filter.py,sha256=H-hOTBVP-zLpvQr-FoGIJKxkhtj4l_sZ9V2Fgu3rTEM,5183
|
|
44
|
-
nv_ingest_api/internal/primitives/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
|
-
nv_ingest_api/internal/primitives/control_message_task.py,sha256=nWVB3QsP6p8BKwHThd-SNv_zwJAEA1mKCRharuju1mc,439
|
|
46
|
-
nv_ingest_api/internal/primitives/ingest_control_message.py,sha256=rvipBiiUaHuRhupFCFDCG8rv0PylSJibCiJ7rDeb98A,8514
|
|
47
|
-
nv_ingest_api/internal/primitives/nim/__init__.py,sha256=i_i_fBR2EcRCh2Y19DF6GM3s_Q0VPgo_thPnhEIJUyg,266
|
|
48
|
-
nv_ingest_api/internal/primitives/nim/default_values.py,sha256=W92XjfyeC6uuVxut6J7p00x1kpNsnXIDb97gSVytZJk,380
|
|
49
|
-
nv_ingest_api/internal/primitives/nim/nim_client.py,sha256=7C_t3BnYz_hL2H8RmvOShLCKlfYmwIREC6vnOnzOHWA,14483
|
|
50
|
-
nv_ingest_api/internal/primitives/nim/nim_model_interface.py,sha256=wMEgoi79YQn_4338MVemkeZgM1J-vnz0aZWpvqDhib4,2392
|
|
51
|
-
nv_ingest_api/internal/primitives/nim/model_interface/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
52
|
-
nv_ingest_api/internal/primitives/nim/model_interface/cached.py,sha256=b1HX-PY1ExW5V6pXC1ZiHdobeG_BmbPr3rBbVJef13s,11003
|
|
53
|
-
nv_ingest_api/internal/primitives/nim/model_interface/decorators.py,sha256=qwubkHs4WjnexM6rI0wkjWCsrVNEbA4Wjk2oKL9OYCU,1499
|
|
54
|
-
nv_ingest_api/internal/primitives/nim/model_interface/deplot.py,sha256=TvKdk6PTuI1WNhRmNNrvygaI_DIutkJkDL-XdtLZQac,10787
|
|
55
|
-
nv_ingest_api/internal/primitives/nim/model_interface/helpers.py,sha256=x35a9AyTYxpESQflLo_YnhVOKblQKVen6vGGFaXmNiE,9927
|
|
56
|
-
nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py,sha256=MFWPqMTXs_MZG3ripRR21o7f_mVeoE46Q10yvJ8KNr0,7023
|
|
57
|
-
nv_ingest_api/internal/primitives/nim/model_interface/paddle.py,sha256=rSUPwl5XOrqneoS6aKhatVjrNBg_LhP3nwUWS_aTwz0,17950
|
|
58
|
-
nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py,sha256=OYg4AGki_wm--Np9VlSm0eZC-r54GbDOISbe9v0B9fw,12967
|
|
59
|
-
nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py,sha256=oai0-moKDJOnOMfTaGQf-vo6qMRD6pbcf7_XRIt-oJ8,4934
|
|
60
|
-
nv_ingest_api/internal/primitives/nim/model_interface/vlm.py,sha256=qJ382PU1ZrIM-SR3cqIhtY_W2rmHec2HIa2aUB2SvaU,6031
|
|
61
|
-
nv_ingest_api/internal/primitives/nim/model_interface/yolox.py,sha256=exN0pKTBXd3pb5kKP96jinTYisgz1Y7EyWmWUuDNnCY,49312
|
|
62
|
-
nv_ingest_api/internal/primitives/tracing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
-
nv_ingest_api/internal/primitives/tracing/latency.py,sha256=5kVTeYRbRdTlT_aI4MeS20N_S7mqCcLqZR6YHtxhXkY,2215
|
|
64
|
-
nv_ingest_api/internal/primitives/tracing/logging.py,sha256=SSzIgS7afLH-e1C7VagYDmkkA6rTXmQ-bmtLjoEguhg,3851
|
|
65
|
-
nv_ingest_api/internal/primitives/tracing/tagging.py,sha256=RC-sF6-w8YBb74nZ2JjEnkCBAxBIamhIHdn1ksxDtRI,7577
|
|
66
|
-
nv_ingest_api/internal/schemas/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
67
|
-
nv_ingest_api/internal/schemas/extract/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
68
|
-
nv_ingest_api/internal/schemas/extract/extract_audio_schema.py,sha256=VVppZgV1lnyJCTfADexzoj3V0lOSq3t6Dw_6VhIxZ7k,3771
|
|
69
|
-
nv_ingest_api/internal/schemas/extract/extract_chart_schema.py,sha256=mNsv628oslNieU6KPUHw_Iwr4WohtK2dIHoVo2HnaEs,4302
|
|
70
|
-
nv_ingest_api/internal/schemas/extract/extract_docx_schema.py,sha256=M2N7WjMNvSemHcJHWeNUD_kFG0wC5VE2W3K6SVrJqvA,3761
|
|
71
|
-
nv_ingest_api/internal/schemas/extract/extract_image_schema.py,sha256=GC4xV8Z9TPLOuxlEtf2fbklSSp8ETGMrDpZgMQ02UwA,3766
|
|
72
|
-
nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py,sha256=_ptTrxN74tpasJ0aQZgaXEUYFe298PJGbGNk6gyeM94,3992
|
|
73
|
-
nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py,sha256=ZBCppSNmnr4jrPl2-R_j0RBw2L4ej_r0hVdFn02AG18,6569
|
|
74
|
-
nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py,sha256=5dT0kv-Mmpe5KW-BZc1JOW3rUlgzVZI0rpB79NWytmw,3761
|
|
75
|
-
nv_ingest_api/internal/schemas/extract/extract_table_schema.py,sha256=SXBYDU3V97-pPOLfhFmXQveP_awARXP7k1aGcMMEJtU,3951
|
|
76
|
-
nv_ingest_api/internal/schemas/message_brokers/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
|
|
77
|
-
nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py,sha256=nbnNzCQCCduoFw4k8XPfkpn3jyyMRpDLROTwEosaSG8,766
|
|
78
|
-
nv_ingest_api/internal/schemas/message_brokers/request_schema.py,sha256=LZX_wXDxTamVFqTQs2Yd8uvWyPE5mddHAWSU4PtfEIQ,966
|
|
79
|
-
nv_ingest_api/internal/schemas/message_brokers/response_schema.py,sha256=4b275HlzBSzpmuE2wdoeaGKPCdKki3wuWldtRIfrj8w,727
|
|
80
|
-
nv_ingest_api/internal/schemas/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
81
|
-
nv_ingest_api/internal/schemas/meta/base_model_noext.py,sha256=8hXU1uuiqZ6t8EsoZ8vlC5EFf2zSZrKEX133FcfZMwI,316
|
|
82
|
-
nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=4Ylcz5CDJXYUKd79-CnyrG7mI463jLd4Uachy7uTRVE,7735
|
|
83
|
-
nv_ingest_api/internal/schemas/meta/metadata_schema.py,sha256=_FAE-yeb01hxq05SXrV3NLM4DPUPSfnIbH6ZMliWsEg,6625
|
|
84
|
-
nv_ingest_api/internal/schemas/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
85
|
-
nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py,sha256=k1JOdlPPpsipc0XhHf-9YxJ_-W0HvpVE1ZhYmr7fzj0,395
|
|
86
|
-
nv_ingest_api/internal/schemas/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
87
|
-
nv_ingest_api/internal/schemas/store/store_embedding_schema.py,sha256=tdKeiraim9CDL9htgp4oUSCoPMoO5PrHBnlXqDyCpMw,956
|
|
88
|
-
nv_ingest_api/internal/schemas/store/store_image_schema.py,sha256=p2LGij9i6sG6RYmsfdiQOiWIc2j-POjxYrNuMrp3ELU,1010
|
|
89
|
-
nv_ingest_api/internal/schemas/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
90
|
-
nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=ORXAowdjxBUyfkw95eg2F82DRFqEsuV9PwNKVBulcmY,568
|
|
91
|
-
nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py,sha256=31ThI5fr0yyENeJeE1xMAA-pxk1QVJLwM842zMate_k,429
|
|
92
|
-
nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=vlTjAj1T78QkQXYkC83vZQKTW04x7PeoukEzmkam7sY,732
|
|
93
|
-
nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py,sha256=iM1sUklcZVA6fdeEWRsMqV_ls-E4UcUsGwewv0JJRi4,759
|
|
94
|
-
nv_ingest_api/internal/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
95
|
-
nv_ingest_api/internal/store/embed_text_upload.py,sha256=maxb4FPsBvWgvlrjAPEBlRZEFdJX5NxPG-p8kUbzV7I,9898
|
|
96
|
-
nv_ingest_api/internal/store/image_upload.py,sha256=J5EHNng7Z5I6M4f3UcbniKQB29Scr3Qe05wsBpaVXds,9653
|
|
97
|
-
nv_ingest_api/internal/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
98
|
-
nv_ingest_api/internal/transform/caption_image.py,sha256=0ILCG2F8ESqKtZiPUM-6F1BHUflFZ76Dzi2GNzkE-lU,8517
|
|
99
|
-
nv_ingest_api/internal/transform/embed_text.py,sha256=MACFgVHUxK3aVlEmymF7F4pT_aKoCcOKxrmmHalk1f0,15622
|
|
100
|
-
nv_ingest_api/internal/transform/split_text.py,sha256=y6NYRkCEVpVsDu-AqrKx2D6JPp1vwxclw9obNZNJIIs,6561
|
|
101
|
-
nv_ingest_api/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
102
|
-
nv_ingest_api/util/control_message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
103
|
-
nv_ingest_api/util/control_message/validators.py,sha256=KvvbyheJ5rbzvJbH9JKpMR9VfoI0b0uM6eTAZte1p44,1315
|
|
104
|
-
nv_ingest_api/util/converters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
105
|
-
nv_ingest_api/util/converters/bytetools.py,sha256=xt57nCLcBgDOQ5S3q4HZ7L6dYcNFUzYuuQ_-e6zR-wo,1418
|
|
106
|
-
nv_ingest_api/util/converters/containers.py,sha256=RV1ooujhq6dVujAzC0MIdZvpOZyQqUeGyQONSZLT2RA,2188
|
|
107
|
-
nv_ingest_api/util/converters/datetools.py,sha256=9tskk4BkdLOMLI9ejXvRmri-otE1_Ast3oyX3HQoJZc,2579
|
|
108
|
-
nv_ingest_api/util/converters/dftools.py,sha256=FjHjazIeiUd1LdFwWuummJmraqZe1a90YrWzSjZKzB4,3284
|
|
109
|
-
nv_ingest_api/util/converters/formats.py,sha256=L11FtormO2SeHSebbwsGE_uuCv6Jk0D3VvVW2avU0vI,2258
|
|
110
|
-
nv_ingest_api/util/converters/type_mappings.py,sha256=VFVK5IXfnXJjG4ijDTzKSEZQvJ7xK6iO8snsLgG2vv8,1108
|
|
111
|
-
nv_ingest_api/util/detectors/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
|
|
112
|
-
nv_ingest_api/util/detectors/language.py,sha256=TvzcESYY0bn0U4aLY6GjB4VaCWA6XrXxAGZbVzHTMuE,965
|
|
113
|
-
nv_ingest_api/util/exception_handlers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
|
-
nv_ingest_api/util/exception_handlers/converters.py,sha256=MYl7b-_V1g661EVDgaep9-nO3A6ka9uyV6pKoN0pEDA,2223
|
|
115
|
-
nv_ingest_api/util/exception_handlers/decorators.py,sha256=F1E_xIpNY5S9TvBEZcsbli88iI5vHrcT1PGmPu5KwcE,8865
|
|
116
|
-
nv_ingest_api/util/exception_handlers/detectors.py,sha256=Q1O-QOzsShPpNian2lawXVAOCstIE7nSytNw516hTg8,2288
|
|
117
|
-
nv_ingest_api/util/exception_handlers/pdf.py,sha256=FUC41QJKDCfiTv-1c1_8Isxwt1xMxDZw9BN8JLEJKBw,3654
|
|
118
|
-
nv_ingest_api/util/exception_handlers/schemas.py,sha256=NJngVNf9sk5Uz6CFFfkNO_LBAMt2QZUcMYGxX64oYRk,2179
|
|
119
|
-
nv_ingest_api/util/image_processing/__init__.py,sha256=Jiy8C1ZuSrNb_eBM1ZTV9IKFIsnjhZi6Ku3JJhVLimA,104
|
|
120
|
-
nv_ingest_api/util/image_processing/clustering.py,sha256=sUGlZI4cx1q8h4Pns1N9JVpdfSM2BOH8zRmn9QFCtzI,9236
|
|
121
|
-
nv_ingest_api/util/image_processing/processing.py,sha256=dHyoxoI2btKT04ODJK0ChB8MR6eCnZ0ZLpbEQowCb5A,6561
|
|
122
|
-
nv_ingest_api/util/image_processing/table_and_chart.py,sha256=bxOu9PZYkG_WFCDGw_JLaO60S2pDSN8EOWK3xkIwr2A,14376
|
|
123
|
-
nv_ingest_api/util/image_processing/transforms.py,sha256=Kz9hrizV314Hy7cRCYK9ZmhmBbVUOZ_z0HEpzZYcslQ,14081
|
|
124
|
-
nv_ingest_api/util/logging/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
125
|
-
nv_ingest_api/util/logging/configuration.py,sha256=GFO7Fofco00O3DbovXDNqInVKpFuvS0i_-WT-GvxKlE,814
|
|
126
|
-
nv_ingest_api/util/message_brokers/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
127
|
-
nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py,sha256=WaQ3CWIpIKWEivT5kL-bkmzcSQKLGFNFHdXHUJjqZFs,325
|
|
128
|
-
nv_ingest_api/util/message_brokers/simple_message_broker/broker.py,sha256=h9Q4q_alXGxCLNlJUZPan46q8fJ7B72sQy2eBfHdk6I,17265
|
|
129
|
-
nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py,sha256=3p-LRqG8qLnsfEhBNf73_DG22C08JKahTqUvPLS2Apg,2554
|
|
130
|
-
nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py,sha256=roNuqWjfNSoi-Dus-RDeF4aBIcSjUpyq8eUDnWrRRBg,15338
|
|
131
|
-
nv_ingest_api/util/metadata/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
|
|
132
|
-
nv_ingest_api/util/metadata/aggregators.py,sha256=Y5JSKuLhhk_ldpzT3eRIcVg7QM7cTNhfQZn4g5bcbq4,15884
|
|
133
|
-
nv_ingest_api/util/multi_processing/__init__.py,sha256=4fojP8Rp_5Hu1YAkqGylqTyEZ-HBVVEunn5Z9I99swA,242
|
|
134
|
-
nv_ingest_api/util/multi_processing/mp_pool_singleton.py,sha256=dTfP82DgGPaXEJH3jywTO8rNlLZUniD4FFzwv84_giE,7372
|
|
135
|
-
nv_ingest_api/util/nim/__init__.py,sha256=UqbiXFCqjWcjNvoduXd_0gOUOGBT8JvppiYHOmMyneA,1775
|
|
136
|
-
nv_ingest_api/util/pdf/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
|
|
137
|
-
nv_ingest_api/util/pdf/pdfium.py,sha256=puqw9lYloNJwjdx9X63TQ9u6vA6FRJqmmPiaquUm3HM,15767
|
|
138
|
-
nv_ingest_api/util/schema/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
139
|
-
nv_ingest_api/util/schema/schema_validator.py,sha256=H0yZ_i_HZaiBRUCGmTBfRB9-hURhVqyd10aS_ynM1_0,321
|
|
140
|
-
nv_ingest_api/util/service_clients/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
141
|
-
nv_ingest_api/util/service_clients/client_base.py,sha256=TUe67EtHKNCx7kLJYRF62AsOJyf3KZ24CFdo0ujRx0M,2035
|
|
142
|
-
nv_ingest_api/util/service_clients/kafka/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
|
|
143
|
-
nv_ingest_api/util/service_clients/redis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
144
|
-
nv_ingest_api/util/service_clients/redis/redis_client.py,sha256=cT4kC2ys7c12lrWPinDuPPgs9gwr3Cw75QqJXQCmtCc,12842
|
|
145
|
-
nv_ingest_api/util/service_clients/rest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
146
|
-
nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=F9V0YxhRwRfaoyiGsybnmxXieR-1ZkiBmJl7QLcZrl0,14413
|
|
147
|
-
nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
|
|
148
|
-
nv_ingest_api-2025.4.17.dev20250417.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
149
|
-
nv_ingest_api-2025.4.17.dev20250417.dist-info/METADATA,sha256=-eK-AA8NgqIwYd1oDZjNaVZNXlVOAwtLRuogarWvnj0,13889
|
|
150
|
-
nv_ingest_api-2025.4.17.dev20250417.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
151
|
-
nv_ingest_api-2025.4.17.dev20250417.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
|
|
152
|
-
nv_ingest_api-2025.4.17.dev20250417.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|