nv-ingest-api 26.1.0rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (177) hide show
  1. nv_ingest_api/__init__.py +3 -0
  2. nv_ingest_api/interface/__init__.py +218 -0
  3. nv_ingest_api/interface/extract.py +977 -0
  4. nv_ingest_api/interface/mutate.py +154 -0
  5. nv_ingest_api/interface/store.py +200 -0
  6. nv_ingest_api/interface/transform.py +382 -0
  7. nv_ingest_api/interface/utility.py +186 -0
  8. nv_ingest_api/internal/__init__.py +0 -0
  9. nv_ingest_api/internal/enums/__init__.py +3 -0
  10. nv_ingest_api/internal/enums/common.py +550 -0
  11. nv_ingest_api/internal/extract/__init__.py +3 -0
  12. nv_ingest_api/internal/extract/audio/__init__.py +3 -0
  13. nv_ingest_api/internal/extract/audio/audio_extraction.py +202 -0
  14. nv_ingest_api/internal/extract/docx/__init__.py +5 -0
  15. nv_ingest_api/internal/extract/docx/docx_extractor.py +232 -0
  16. nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
  17. nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +3 -0
  18. nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +127 -0
  19. nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +971 -0
  20. nv_ingest_api/internal/extract/html/__init__.py +3 -0
  21. nv_ingest_api/internal/extract/html/html_extractor.py +84 -0
  22. nv_ingest_api/internal/extract/image/__init__.py +3 -0
  23. nv_ingest_api/internal/extract/image/chart_extractor.py +375 -0
  24. nv_ingest_api/internal/extract/image/image_extractor.py +208 -0
  25. nv_ingest_api/internal/extract/image/image_helpers/__init__.py +3 -0
  26. nv_ingest_api/internal/extract/image/image_helpers/common.py +433 -0
  27. nv_ingest_api/internal/extract/image/infographic_extractor.py +290 -0
  28. nv_ingest_api/internal/extract/image/ocr_extractor.py +407 -0
  29. nv_ingest_api/internal/extract/image/table_extractor.py +391 -0
  30. nv_ingest_api/internal/extract/pdf/__init__.py +3 -0
  31. nv_ingest_api/internal/extract/pdf/engines/__init__.py +19 -0
  32. nv_ingest_api/internal/extract/pdf/engines/adobe.py +484 -0
  33. nv_ingest_api/internal/extract/pdf/engines/llama.py +246 -0
  34. nv_ingest_api/internal/extract/pdf/engines/nemotron_parse.py +598 -0
  35. nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +166 -0
  36. nv_ingest_api/internal/extract/pdf/engines/pdfium.py +652 -0
  37. nv_ingest_api/internal/extract/pdf/engines/tika.py +96 -0
  38. nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +426 -0
  39. nv_ingest_api/internal/extract/pdf/pdf_extractor.py +74 -0
  40. nv_ingest_api/internal/extract/pptx/__init__.py +5 -0
  41. nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
  42. nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +968 -0
  43. nv_ingest_api/internal/extract/pptx/pptx_extractor.py +210 -0
  44. nv_ingest_api/internal/meta/__init__.py +3 -0
  45. nv_ingest_api/internal/meta/udf.py +232 -0
  46. nv_ingest_api/internal/mutate/__init__.py +3 -0
  47. nv_ingest_api/internal/mutate/deduplicate.py +110 -0
  48. nv_ingest_api/internal/mutate/filter.py +133 -0
  49. nv_ingest_api/internal/primitives/__init__.py +0 -0
  50. nv_ingest_api/internal/primitives/control_message_task.py +16 -0
  51. nv_ingest_api/internal/primitives/ingest_control_message.py +307 -0
  52. nv_ingest_api/internal/primitives/nim/__init__.py +9 -0
  53. nv_ingest_api/internal/primitives/nim/default_values.py +14 -0
  54. nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +3 -0
  55. nv_ingest_api/internal/primitives/nim/model_interface/cached.py +274 -0
  56. nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +56 -0
  57. nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +270 -0
  58. nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +338 -0
  59. nv_ingest_api/internal/primitives/nim/model_interface/nemotron_parse.py +239 -0
  60. nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +776 -0
  61. nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +367 -0
  62. nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +129 -0
  63. nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +177 -0
  64. nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +1681 -0
  65. nv_ingest_api/internal/primitives/nim/nim_client.py +801 -0
  66. nv_ingest_api/internal/primitives/nim/nim_model_interface.py +126 -0
  67. nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
  68. nv_ingest_api/internal/primitives/tracing/latency.py +69 -0
  69. nv_ingest_api/internal/primitives/tracing/logging.py +96 -0
  70. nv_ingest_api/internal/primitives/tracing/tagging.py +288 -0
  71. nv_ingest_api/internal/schemas/__init__.py +3 -0
  72. nv_ingest_api/internal/schemas/extract/__init__.py +3 -0
  73. nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +133 -0
  74. nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +144 -0
  75. nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +129 -0
  76. nv_ingest_api/internal/schemas/extract/extract_html_schema.py +34 -0
  77. nv_ingest_api/internal/schemas/extract/extract_image_schema.py +126 -0
  78. nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +137 -0
  79. nv_ingest_api/internal/schemas/extract/extract_ocr_schema.py +137 -0
  80. nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +220 -0
  81. nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +128 -0
  82. nv_ingest_api/internal/schemas/extract/extract_table_schema.py +137 -0
  83. nv_ingest_api/internal/schemas/message_brokers/__init__.py +3 -0
  84. nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +37 -0
  85. nv_ingest_api/internal/schemas/message_brokers/request_schema.py +34 -0
  86. nv_ingest_api/internal/schemas/message_brokers/response_schema.py +19 -0
  87. nv_ingest_api/internal/schemas/meta/__init__.py +3 -0
  88. nv_ingest_api/internal/schemas/meta/base_model_noext.py +11 -0
  89. nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +355 -0
  90. nv_ingest_api/internal/schemas/meta/metadata_schema.py +394 -0
  91. nv_ingest_api/internal/schemas/meta/udf.py +23 -0
  92. nv_ingest_api/internal/schemas/mixins.py +39 -0
  93. nv_ingest_api/internal/schemas/mutate/__init__.py +3 -0
  94. nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +16 -0
  95. nv_ingest_api/internal/schemas/store/__init__.py +3 -0
  96. nv_ingest_api/internal/schemas/store/store_embedding_schema.py +28 -0
  97. nv_ingest_api/internal/schemas/store/store_image_schema.py +45 -0
  98. nv_ingest_api/internal/schemas/transform/__init__.py +3 -0
  99. nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +36 -0
  100. nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +17 -0
  101. nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +48 -0
  102. nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +24 -0
  103. nv_ingest_api/internal/store/__init__.py +3 -0
  104. nv_ingest_api/internal/store/embed_text_upload.py +236 -0
  105. nv_ingest_api/internal/store/image_upload.py +251 -0
  106. nv_ingest_api/internal/transform/__init__.py +3 -0
  107. nv_ingest_api/internal/transform/caption_image.py +219 -0
  108. nv_ingest_api/internal/transform/embed_text.py +702 -0
  109. nv_ingest_api/internal/transform/split_text.py +182 -0
  110. nv_ingest_api/util/__init__.py +3 -0
  111. nv_ingest_api/util/control_message/__init__.py +0 -0
  112. nv_ingest_api/util/control_message/validators.py +47 -0
  113. nv_ingest_api/util/converters/__init__.py +0 -0
  114. nv_ingest_api/util/converters/bytetools.py +78 -0
  115. nv_ingest_api/util/converters/containers.py +65 -0
  116. nv_ingest_api/util/converters/datetools.py +90 -0
  117. nv_ingest_api/util/converters/dftools.py +127 -0
  118. nv_ingest_api/util/converters/formats.py +64 -0
  119. nv_ingest_api/util/converters/type_mappings.py +27 -0
  120. nv_ingest_api/util/dataloader/__init__.py +9 -0
  121. nv_ingest_api/util/dataloader/dataloader.py +409 -0
  122. nv_ingest_api/util/detectors/__init__.py +5 -0
  123. nv_ingest_api/util/detectors/language.py +38 -0
  124. nv_ingest_api/util/exception_handlers/__init__.py +0 -0
  125. nv_ingest_api/util/exception_handlers/converters.py +72 -0
  126. nv_ingest_api/util/exception_handlers/decorators.py +429 -0
  127. nv_ingest_api/util/exception_handlers/detectors.py +74 -0
  128. nv_ingest_api/util/exception_handlers/pdf.py +116 -0
  129. nv_ingest_api/util/exception_handlers/schemas.py +68 -0
  130. nv_ingest_api/util/image_processing/__init__.py +5 -0
  131. nv_ingest_api/util/image_processing/clustering.py +260 -0
  132. nv_ingest_api/util/image_processing/processing.py +177 -0
  133. nv_ingest_api/util/image_processing/table_and_chart.py +504 -0
  134. nv_ingest_api/util/image_processing/transforms.py +850 -0
  135. nv_ingest_api/util/imports/__init__.py +3 -0
  136. nv_ingest_api/util/imports/callable_signatures.py +108 -0
  137. nv_ingest_api/util/imports/dynamic_resolvers.py +158 -0
  138. nv_ingest_api/util/introspection/__init__.py +3 -0
  139. nv_ingest_api/util/introspection/class_inspect.py +145 -0
  140. nv_ingest_api/util/introspection/function_inspect.py +65 -0
  141. nv_ingest_api/util/logging/__init__.py +0 -0
  142. nv_ingest_api/util/logging/configuration.py +102 -0
  143. nv_ingest_api/util/logging/sanitize.py +84 -0
  144. nv_ingest_api/util/message_brokers/__init__.py +3 -0
  145. nv_ingest_api/util/message_brokers/qos_scheduler.py +283 -0
  146. nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +9 -0
  147. nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +465 -0
  148. nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +71 -0
  149. nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +455 -0
  150. nv_ingest_api/util/metadata/__init__.py +5 -0
  151. nv_ingest_api/util/metadata/aggregators.py +516 -0
  152. nv_ingest_api/util/multi_processing/__init__.py +8 -0
  153. nv_ingest_api/util/multi_processing/mp_pool_singleton.py +200 -0
  154. nv_ingest_api/util/nim/__init__.py +161 -0
  155. nv_ingest_api/util/pdf/__init__.py +3 -0
  156. nv_ingest_api/util/pdf/pdfium.py +428 -0
  157. nv_ingest_api/util/schema/__init__.py +3 -0
  158. nv_ingest_api/util/schema/schema_validator.py +10 -0
  159. nv_ingest_api/util/service_clients/__init__.py +3 -0
  160. nv_ingest_api/util/service_clients/client_base.py +86 -0
  161. nv_ingest_api/util/service_clients/kafka/__init__.py +3 -0
  162. nv_ingest_api/util/service_clients/redis/__init__.py +3 -0
  163. nv_ingest_api/util/service_clients/redis/redis_client.py +983 -0
  164. nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
  165. nv_ingest_api/util/service_clients/rest/rest_client.py +595 -0
  166. nv_ingest_api/util/string_processing/__init__.py +51 -0
  167. nv_ingest_api/util/string_processing/configuration.py +682 -0
  168. nv_ingest_api/util/string_processing/yaml.py +109 -0
  169. nv_ingest_api/util/system/__init__.py +0 -0
  170. nv_ingest_api/util/system/hardware_info.py +594 -0
  171. nv_ingest_api-26.1.0rc4.dist-info/METADATA +237 -0
  172. nv_ingest_api-26.1.0rc4.dist-info/RECORD +177 -0
  173. nv_ingest_api-26.1.0rc4.dist-info/WHEEL +5 -0
  174. nv_ingest_api-26.1.0rc4.dist-info/licenses/LICENSE +201 -0
  175. nv_ingest_api-26.1.0rc4.dist-info/top_level.txt +2 -0
  176. udfs/__init__.py +5 -0
  177. udfs/llm_summarizer_udf.py +259 -0
@@ -0,0 +1,455 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ # NOTE: This code is duplicated from the ingest service:
6
+ # src/nv_ingest/util/message_brokers/simple_message_broker/simple_client.py
7
+ # Eventually we should move all client wrappers for the message broker into a shared library that both the ingest
8
+ # service and the client can use.
9
+
10
+ import socket
11
+ import json
12
+ import time
13
+ import logging
14
+ from typing import Optional, Tuple, Union
15
+
16
+ from nv_ingest_api.internal.schemas.message_brokers.response_schema import ResponseSchema
17
+ from nv_ingest_api.util.service_clients.client_base import MessageBrokerClientBase
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class SimpleClient(MessageBrokerClientBase):
23
+ """
24
+ A client for interfacing with SimpleMessageBroker, creating a new socket connection per request
25
+ to ensure thread safety and robustness. Respects timeouts for all operations.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ host: str,
31
+ port: int,
32
+ db: int = 0,
33
+ max_retries: int = 3,
34
+ max_backoff: int = 32,
35
+ connection_timeout: int = 300,
36
+ max_pool_size: int = 128,
37
+ use_ssl: bool = False,
38
+ api_version: str = "v1",
39
+ ):
40
+ """
41
+ Initialize the SimpleClient with configuration parameters.
42
+
43
+ Parameters
44
+ ----------
45
+ host : str
46
+ The hostname or IP address of the broker.
47
+ port : int
48
+ The port number of the broker.
49
+ db : int, optional
50
+ The database index (default: 0).
51
+ max_retries : int, optional
52
+ Maximum number of retries for operations (default: 3).
53
+ max_backoff : int, optional
54
+ Maximum backoff time in seconds for retries (default: 32).
55
+ connection_timeout : int, optional
56
+ Timeout in seconds for establishing a connection (default: 300).
57
+ max_pool_size : int, optional
58
+ Maximum pool size for connections (default: 128).
59
+ use_ssl : bool, optional
60
+ Whether to use SSL for connections (default: False).
61
+ """
62
+
63
+ self._host = host
64
+ self._port = port
65
+ self._db = db
66
+ self._max_retries = max_retries
67
+ self._max_backoff = max_backoff
68
+ self._max_pool_size = max_pool_size
69
+ self._connection_timeout = connection_timeout
70
+ self._use_ssl = use_ssl
71
+
72
+ def get_client(self):
73
+ """
74
+ Retrieve the current client instance.
75
+
76
+ Returns
77
+ -------
78
+ SimpleClient
79
+ The current client instance.
80
+ """
81
+ return self
82
+
83
+ def submit_message(
84
+ self,
85
+ queue_name: str,
86
+ message: str,
87
+ timeout: Optional[Tuple[int, Union[float]]] = (100, None),
88
+ for_nv_ingest: bool = False,
89
+ ) -> ResponseSchema:
90
+ """
91
+ Submit a message to the specified queue.
92
+
93
+ Parameters
94
+ ----------
95
+ queue_name : str
96
+ The name of the queue.
97
+ message : str
98
+ The message to be submitted.
99
+ timeout : float, optional
100
+ Timeout in seconds for the operation.
101
+ for_nv_ingest : bool, optional
102
+ Indicates whether the message is for NV ingest operations.
103
+
104
+ Returns
105
+ -------
106
+ ResponseSchema
107
+ The response from the broker.
108
+ """
109
+ return self._handle_push(queue_name, message, timeout, for_nv_ingest)
110
+
111
+ def fetch_message(
112
+ self, queue_name: str, timeout: Optional[Tuple[int, Union[float, None]]] = (1200, None)
113
+ ) -> ResponseSchema:
114
+ """
115
+ Fetch a message from a specified queue.
116
+
117
+ Parameters
118
+ ----------
119
+ queue_name : str
120
+ The name of the queue.
121
+ timeout : tuple, optional
122
+ A tuple containing the timeout value and an unused second element.
123
+
124
+ Returns
125
+ -------
126
+ ResponseSchema
127
+ The response from the broker.
128
+ """
129
+ return self._handle_pop(queue_name, timeout)
130
+
131
+ def ping(self) -> ResponseSchema:
132
+ """
133
+ Ping the broker to check connectivity.
134
+
135
+ Returns
136
+ -------
137
+ ResponseSchema
138
+ The response indicating the success of the ping operation.
139
+ """
140
+ command = {"command": "PING"}
141
+ return self._execute_simple_command(command)
142
+
143
+ def size(self, queue_name: str) -> ResponseSchema:
144
+ """
145
+ Fetch the size of the specified queue.
146
+
147
+ Parameters
148
+ ----------
149
+ queue_name : str
150
+ The name of the queue.
151
+
152
+ Returns
153
+ -------
154
+ ResponseSchema
155
+ The response containing the queue size.
156
+ """
157
+ command = {"command": "SIZE", "queue_name": queue_name}
158
+ return self._execute_simple_command(command)
159
+
160
+ def _handle_push(
161
+ self, queue_name: str, message: str, timeout: Optional[Tuple[int, Union[float, None]]], for_nv_ingest: bool
162
+ ) -> ResponseSchema:
163
+ """
164
+ Push a message to the queue with optional timeout.
165
+
166
+ Parameters
167
+ ----------
168
+ queue_name : str
169
+ The name of the queue.
170
+ message : str
171
+ The message to push.
172
+ timeout : float, optional
173
+ Timeout in seconds for the operation.
174
+ for_nv_ingest : bool
175
+ Indicates whether the message is for NV ingest operations.
176
+
177
+ Returns
178
+ -------
179
+ ResponseSchema
180
+ The response from the broker.
181
+ """
182
+
183
+ if not queue_name or not isinstance(queue_name, str):
184
+ return ResponseSchema(response_code=1, response_reason="Invalid queue name.")
185
+ if not message or not isinstance(message, str):
186
+ return ResponseSchema(response_code=1, response_reason="Invalid message.")
187
+
188
+ if for_nv_ingest:
189
+ command = {"command": "PUSH_FOR_NV_INGEST", "queue_name": queue_name, "message": message}
190
+ else:
191
+ command = {"command": "PUSH", "queue_name": queue_name, "message": message}
192
+
193
+ timeout = int(timeout[0])
194
+ if timeout is not None:
195
+ command["timeout"] = timeout
196
+
197
+ start_time = time.time()
198
+ while True:
199
+ elapsed = time.time() - start_time
200
+ remaining_timeout = (timeout - elapsed) if (timeout is not None) else None
201
+ if (remaining_timeout is not None) and (remaining_timeout <= 0):
202
+ return ResponseSchema(response_code=1, response_reason="PUSH operation timed out.")
203
+
204
+ try:
205
+ with socket.create_connection((self._host, self._port), timeout=self._connection_timeout) as sock:
206
+ sock.settimeout(self._connection_timeout)
207
+ self._send(sock, json.dumps(command).encode("utf-8"))
208
+ # Receive initial response with transaction ID
209
+ response_data = self._recv(sock)
210
+ response = json.loads(response_data)
211
+
212
+ if response.get("response_code") != 0:
213
+ if (
214
+ response.get("response_reason") == "Queue is full"
215
+ or response.get("response_reason") == "Queue is not available"
216
+ ):
217
+ time.sleep(0.5)
218
+ continue
219
+ else:
220
+ return ResponseSchema(**response)
221
+
222
+ if "transaction_id" not in response:
223
+ error_msg = "No transaction_id in response."
224
+ logger.error(error_msg)
225
+
226
+ return ResponseSchema(response_code=1, response_reason=error_msg)
227
+
228
+ transaction_id = response["transaction_id"]
229
+
230
+ # Send ACK
231
+ ack_data = json.dumps({"transaction_id": transaction_id, "ack": True}).encode("utf-8")
232
+ self._send(sock, ack_data)
233
+
234
+ # Receive final response
235
+ final_response_data = self._recv(sock)
236
+ final_response = json.loads(final_response_data)
237
+
238
+ return ResponseSchema(**final_response)
239
+
240
+ except (ConnectionError, socket.error, BrokenPipeError, socket.timeout) as e:
241
+ logger.debug(f"Connection error during PUSH: {e}")
242
+ pass # Will be retried
243
+ except json.JSONDecodeError:
244
+ return ResponseSchema(response_code=1, response_reason="Invalid JSON response from server.")
245
+ except Exception as e:
246
+ return ResponseSchema(response_code=1, response_reason=str(e))
247
+
248
+ time.sleep(0.5) # Backoff delay before retry
249
+
250
+ def _handle_pop(self, queue_name: str, timeout: Optional[Tuple[int, Union[float, None]]]) -> ResponseSchema:
251
+ """
252
+ Pop a message from the queue with optional timeout.
253
+
254
+ Parameters
255
+ ----------
256
+ queue_name : str
257
+ The name of the queue.
258
+ timeout : float, optional
259
+ Timeout in seconds for the operation.
260
+
261
+ Returns
262
+ -------
263
+ ResponseSchema
264
+ The response containing the popped message.
265
+ """
266
+
267
+ if not queue_name or not isinstance(queue_name, str):
268
+ return ResponseSchema(response_code=1, response_reason="Invalid queue name.")
269
+
270
+ command = {"command": "POP", "queue_name": queue_name}
271
+
272
+ timeout_val = timeout[0] if isinstance(timeout, tuple) else timeout
273
+
274
+ if timeout_val is not None:
275
+ command["timeout"] = timeout_val
276
+
277
+ start_time = time.time()
278
+ backoff_delay = 1 # Start with a 1-second backoff
279
+
280
+ while True:
281
+ elapsed = time.time() - start_time
282
+ if timeout_val is not None and elapsed >= timeout_val:
283
+ return ResponseSchema(response_code=2, response_reason="Job not ready.")
284
+
285
+ try:
286
+ with socket.create_connection((self._host, self._port), timeout=self._connection_timeout) as sock:
287
+ sock.settimeout(self._connection_timeout)
288
+ self._send(sock, json.dumps(command).encode("utf-8"))
289
+ # Receive initial response with transaction ID and message
290
+ response_data = self._recv(sock)
291
+ response = json.loads(response_data)
292
+
293
+ # The broker now returns a response_code of 2 for a timeout, which the high-level
294
+ # client should handle as a retryable event.
295
+ if response.get("response_code") == 2:
296
+ # Queue is empty or job not ready, continue to backoff and retry
297
+ pass
298
+ elif response.get("response_code") != 0:
299
+ return ResponseSchema(**response)
300
+ else:
301
+ # Success case: we received a message.
302
+ if "transaction_id" not in response:
303
+ return ResponseSchema(response_code=1, response_reason="No transaction_id in response.")
304
+
305
+ transaction_id = response["transaction_id"]
306
+ message = response.get("response")
307
+
308
+ # Send ACK
309
+ ack_data = json.dumps({"transaction_id": transaction_id, "ack": True}).encode("utf-8")
310
+ self._send(sock, ack_data)
311
+
312
+ # Receive final response
313
+ final_response_data = self._recv(sock)
314
+ final_response = json.loads(final_response_data)
315
+
316
+ if final_response.get("response_code") == 0:
317
+ return ResponseSchema(response_code=0, response=message, transaction_id=transaction_id)
318
+ else:
319
+ return ResponseSchema(**final_response)
320
+
321
+ except (ConnectionError, socket.error, BrokenPipeError, socket.timeout) as e:
322
+ # Let the high-level client handle connection errors as retryable.
323
+ logger.debug(f"Connection error during POP: {e}, will retry after backoff.")
324
+ pass # Fall through to backoff and retry
325
+ except json.JSONDecodeError:
326
+ return ResponseSchema(response_code=1, response_reason="Invalid JSON response from server.")
327
+ except Exception as e:
328
+ return ResponseSchema(response_code=1, response_reason=str(e))
329
+
330
+ # Exponential backoff
331
+ time.sleep(backoff_delay)
332
+ backoff_delay = min(backoff_delay * 2, self._max_backoff)
333
+
334
+ def _execute_simple_command(self, command: dict) -> ResponseSchema:
335
+ """
336
+ Execute a simple command on the broker and process the response.
337
+
338
+ Parameters
339
+ ----------
340
+ command : dict
341
+ The command to execute.
342
+
343
+ Returns
344
+ -------
345
+ ResponseSchema
346
+ The response from the broker.
347
+ """
348
+
349
+ if isinstance(command, dict):
350
+ data = json.dumps(command).encode("utf-8")
351
+ elif isinstance(command, str):
352
+ data = command.encode("utf-8")
353
+
354
+ try:
355
+ with socket.create_connection((self._host, self._port), timeout=self._connection_timeout) as sock:
356
+ sock.settimeout(self._connection_timeout)
357
+ self._send(sock, data)
358
+ response_data = self._recv(sock)
359
+ response = json.loads(response_data)
360
+ return ResponseSchema(**response)
361
+ except (ConnectionError, socket.error, BrokenPipeError, socket.timeout) as e:
362
+ return ResponseSchema(response_code=2, response_reason=f"Connection error: {e}")
363
+ except json.JSONDecodeError:
364
+ return ResponseSchema(response_code=1, response_reason="Invalid JSON response from server.")
365
+ except Exception as e:
366
+ return ResponseSchema(response_code=1, response_reason=str(e))
367
+
368
+ def _send(self, sock: socket.socket, data: bytes) -> None:
369
+ """
370
+ Send data over a socket connection with a length header.
371
+
372
+ Parameters
373
+ ----------
374
+ sock : socket.socket
375
+ The socket connection.
376
+ data : bytes
377
+ The data to send.
378
+
379
+ Raises
380
+ ------
381
+ ConnectionError
382
+ If sending data fails.
383
+ """
384
+
385
+ total_length = len(data)
386
+ if total_length == 0:
387
+ raise ValueError("Cannot send an empty message.")
388
+
389
+ try:
390
+ sock.sendall(total_length.to_bytes(8, "big"))
391
+ sock.sendall(data)
392
+ except (socket.error, BrokenPipeError):
393
+ raise ConnectionError("Failed to send data.")
394
+
395
+ def _recv(self, sock: socket.socket) -> str:
396
+ """
397
+ Receive data from a socket connection based on a length header.
398
+
399
+ Parameters
400
+ ----------
401
+ sock : socket.socket
402
+ The socket connection.
403
+
404
+ Returns
405
+ -------
406
+ str
407
+ The received data as a string.
408
+
409
+ Raises
410
+ ------
411
+ ConnectionError
412
+ If receiving data fails.
413
+ """
414
+
415
+ try:
416
+ length_header = self._recv_exact(sock, 8)
417
+ if not length_header:
418
+ raise ConnectionError("Incomplete length header received.")
419
+ total_length = int.from_bytes(length_header, "big")
420
+ data_bytes = self._recv_exact(sock, total_length)
421
+ if not data_bytes:
422
+ raise ConnectionError("Incomplete message received.")
423
+ return data_bytes.decode("utf-8")
424
+ except (socket.error, BrokenPipeError, ConnectionError):
425
+ raise ConnectionError("Failed to receive data.")
426
+
427
+ def _recv_exact(self, sock: socket.socket, num_bytes: int) -> Optional[bytes]:
428
+ """
429
+ Receive an exact number of bytes from a socket connection.
430
+
431
+ Parameters
432
+ ----------
433
+ sock : socket.socket
434
+ The socket connection.
435
+ num_bytes : int
436
+ The number of bytes to receive.
437
+
438
+ Returns
439
+ -------
440
+ Optional[bytes]
441
+ The received bytes, or None if the connection is closed.
442
+ """
443
+
444
+ data = bytearray()
445
+ while len(data) < num_bytes:
446
+ try:
447
+ packet = sock.recv(num_bytes - len(data))
448
+ if not packet:
449
+ return None
450
+ data.extend(packet)
451
+ except socket.timeout:
452
+ return None
453
+ except Exception:
454
+ return None
455
+ return bytes(data)
@@ -0,0 +1,5 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ # Copyright (c) 2024, NVIDIA CORPORATION.