nv-ingest-api 2025.3.27.dev20250327__py3-none-any.whl → 2025.3.28.dev20250328__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (153) hide show
  1. nv_ingest_api/__init__.py +0 -3
  2. nv_ingest_api/{internal/primitives → primitives}/control_message_task.py +0 -4
  3. nv_ingest_api/{internal/primitives → primitives}/ingest_control_message.py +2 -5
  4. {nv_ingest_api-2025.3.27.dev20250327.dist-info → nv_ingest_api-2025.3.28.dev20250328.dist-info}/METADATA +1 -1
  5. nv_ingest_api-2025.3.28.dev20250328.dist-info/RECORD +9 -0
  6. nv_ingest_api/interface/__init__.py +0 -215
  7. nv_ingest_api/interface/extract.py +0 -972
  8. nv_ingest_api/interface/mutate.py +0 -154
  9. nv_ingest_api/interface/store.py +0 -218
  10. nv_ingest_api/interface/transform.py +0 -382
  11. nv_ingest_api/interface/utility.py +0 -200
  12. nv_ingest_api/internal/enums/__init__.py +0 -3
  13. nv_ingest_api/internal/enums/common.py +0 -494
  14. nv_ingest_api/internal/extract/__init__.py +0 -3
  15. nv_ingest_api/internal/extract/audio/__init__.py +0 -3
  16. nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -149
  17. nv_ingest_api/internal/extract/docx/__init__.py +0 -5
  18. nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -205
  19. nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
  20. nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -3
  21. nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -122
  22. nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -895
  23. nv_ingest_api/internal/extract/image/__init__.py +0 -3
  24. nv_ingest_api/internal/extract/image/chart_extractor.py +0 -353
  25. nv_ingest_api/internal/extract/image/image_extractor.py +0 -204
  26. nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -3
  27. nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -403
  28. nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -253
  29. nv_ingest_api/internal/extract/image/table_extractor.py +0 -344
  30. nv_ingest_api/internal/extract/pdf/__init__.py +0 -3
  31. nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -19
  32. nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -484
  33. nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -243
  34. nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -597
  35. nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -146
  36. nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -603
  37. nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -96
  38. nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -426
  39. nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -74
  40. nv_ingest_api/internal/extract/pptx/__init__.py +0 -5
  41. nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
  42. nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -799
  43. nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -187
  44. nv_ingest_api/internal/mutate/__init__.py +0 -3
  45. nv_ingest_api/internal/mutate/deduplicate.py +0 -110
  46. nv_ingest_api/internal/mutate/filter.py +0 -133
  47. nv_ingest_api/internal/primitives/__init__.py +0 -0
  48. nv_ingest_api/internal/primitives/nim/__init__.py +0 -8
  49. nv_ingest_api/internal/primitives/nim/default_values.py +0 -15
  50. nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -3
  51. nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -274
  52. nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -56
  53. nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -270
  54. nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -272
  55. nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -238
  56. nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +0 -452
  57. nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -367
  58. nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -132
  59. nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -152
  60. nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -1400
  61. nv_ingest_api/internal/primitives/nim/nim_client.py +0 -344
  62. nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -81
  63. nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
  64. nv_ingest_api/internal/primitives/tracing/latency.py +0 -69
  65. nv_ingest_api/internal/primitives/tracing/logging.py +0 -96
  66. nv_ingest_api/internal/primitives/tracing/tagging.py +0 -197
  67. nv_ingest_api/internal/schemas/__init__.py +0 -3
  68. nv_ingest_api/internal/schemas/extract/__init__.py +0 -3
  69. nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -130
  70. nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -135
  71. nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -124
  72. nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -124
  73. nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -128
  74. nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -218
  75. nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -124
  76. nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -129
  77. nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -3
  78. nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -23
  79. nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -34
  80. nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -19
  81. nv_ingest_api/internal/schemas/meta/__init__.py +0 -3
  82. nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -11
  83. nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +0 -237
  84. nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -221
  85. nv_ingest_api/internal/schemas/mutate/__init__.py +0 -3
  86. nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -16
  87. nv_ingest_api/internal/schemas/store/__init__.py +0 -3
  88. nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -28
  89. nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -30
  90. nv_ingest_api/internal/schemas/transform/__init__.py +0 -3
  91. nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +0 -15
  92. nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -17
  93. nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +0 -25
  94. nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -22
  95. nv_ingest_api/internal/store/__init__.py +0 -3
  96. nv_ingest_api/internal/store/embed_text_upload.py +0 -236
  97. nv_ingest_api/internal/store/image_upload.py +0 -232
  98. nv_ingest_api/internal/transform/__init__.py +0 -3
  99. nv_ingest_api/internal/transform/caption_image.py +0 -205
  100. nv_ingest_api/internal/transform/embed_text.py +0 -496
  101. nv_ingest_api/internal/transform/split_text.py +0 -157
  102. nv_ingest_api/util/__init__.py +0 -0
  103. nv_ingest_api/util/control_message/__init__.py +0 -0
  104. nv_ingest_api/util/control_message/validators.py +0 -47
  105. nv_ingest_api/util/converters/__init__.py +0 -0
  106. nv_ingest_api/util/converters/bytetools.py +0 -78
  107. nv_ingest_api/util/converters/containers.py +0 -65
  108. nv_ingest_api/util/converters/datetools.py +0 -90
  109. nv_ingest_api/util/converters/dftools.py +0 -127
  110. nv_ingest_api/util/converters/formats.py +0 -64
  111. nv_ingest_api/util/converters/type_mappings.py +0 -27
  112. nv_ingest_api/util/detectors/__init__.py +0 -5
  113. nv_ingest_api/util/detectors/language.py +0 -38
  114. nv_ingest_api/util/exception_handlers/__init__.py +0 -0
  115. nv_ingest_api/util/exception_handlers/converters.py +0 -72
  116. nv_ingest_api/util/exception_handlers/decorators.py +0 -223
  117. nv_ingest_api/util/exception_handlers/detectors.py +0 -74
  118. nv_ingest_api/util/exception_handlers/pdf.py +0 -116
  119. nv_ingest_api/util/exception_handlers/schemas.py +0 -68
  120. nv_ingest_api/util/image_processing/__init__.py +0 -5
  121. nv_ingest_api/util/image_processing/clustering.py +0 -260
  122. nv_ingest_api/util/image_processing/processing.py +0 -179
  123. nv_ingest_api/util/image_processing/table_and_chart.py +0 -449
  124. nv_ingest_api/util/image_processing/transforms.py +0 -407
  125. nv_ingest_api/util/logging/__init__.py +0 -0
  126. nv_ingest_api/util/logging/configuration.py +0 -31
  127. nv_ingest_api/util/message_brokers/__init__.py +0 -3
  128. nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -9
  129. nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -465
  130. nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -71
  131. nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -435
  132. nv_ingest_api/util/metadata/__init__.py +0 -5
  133. nv_ingest_api/util/metadata/aggregators.py +0 -469
  134. nv_ingest_api/util/multi_processing/__init__.py +0 -8
  135. nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -194
  136. nv_ingest_api/util/nim/__init__.py +0 -56
  137. nv_ingest_api/util/pdf/__init__.py +0 -3
  138. nv_ingest_api/util/pdf/pdfium.py +0 -427
  139. nv_ingest_api/util/schema/__init__.py +0 -0
  140. nv_ingest_api/util/schema/schema_validator.py +0 -10
  141. nv_ingest_api/util/service_clients/__init__.py +0 -3
  142. nv_ingest_api/util/service_clients/client_base.py +0 -72
  143. nv_ingest_api/util/service_clients/kafka/__init__.py +0 -3
  144. nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
  145. nv_ingest_api/util/service_clients/redis/redis_client.py +0 -334
  146. nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
  147. nv_ingest_api/util/service_clients/rest/rest_client.py +0 -368
  148. nv_ingest_api/util/string_processing/__init__.py +0 -51
  149. nv_ingest_api-2025.3.27.dev20250327.dist-info/RECORD +0 -152
  150. /nv_ingest_api/{internal → primitives}/__init__.py +0 -0
  151. {nv_ingest_api-2025.3.27.dev20250327.dist-info → nv_ingest_api-2025.3.28.dev20250328.dist-info}/WHEEL +0 -0
  152. {nv_ingest_api-2025.3.27.dev20250327.dist-info → nv_ingest_api-2025.3.28.dev20250328.dist-info}/licenses/LICENSE +0 -0
  153. {nv_ingest_api-2025.3.27.dev20250327.dist-info → nv_ingest_api-2025.3.28.dev20250328.dist-info}/top_level.txt +0 -0
@@ -1,452 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
- # All rights reserved.
3
- # SPDX-License-Identifier: Apache-2.0
4
-
5
- import json
6
- import logging
7
- from typing import Any, List, Tuple
8
- from typing import Dict
9
- from typing import Optional
10
-
11
- import numpy as np
12
-
13
- from nv_ingest_api.internal.primitives.nim import ModelInterface
14
- from nv_ingest_api.internal.primitives.nim.model_interface.helpers import preprocess_image_for_paddle
15
- from nv_ingest_api.util.image_processing.transforms import base64_to_numpy
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
-
20
- class PaddleOCRModelInterface(ModelInterface):
21
- """
22
- An interface for handling inference with a PaddleOCR model, supporting both gRPC and HTTP protocols.
23
- """
24
-
25
- def name(self) -> str:
26
- """
27
- Get the name of the model interface.
28
-
29
- Returns
30
- -------
31
- str
32
- The name of the model interface.
33
- """
34
- return "PaddleOCR"
35
-
36
- def prepare_data_for_inference(self, data: Dict[str, Any]) -> Dict[str, Any]:
37
- """
38
- Decode one or more base64-encoded images into NumPy arrays, storing them
39
- alongside their dimensions in `data`.
40
-
41
- Parameters
42
- ----------
43
- data : dict of str -> Any
44
- The input data containing either:
45
- - 'base64_image': a single base64-encoded image, or
46
- - 'base64_images': a list of base64-encoded images.
47
-
48
- Returns
49
- -------
50
- dict of str -> Any
51
- The updated data dictionary with the following keys added:
52
- - "image_arrays": List of decoded NumPy arrays of shape (H, W, C).
53
- - "image_dims": List of (height, width) tuples for each decoded image.
54
-
55
- Raises
56
- ------
57
- KeyError
58
- If neither 'base64_image' nor 'base64_images' is found in `data`.
59
- ValueError
60
- If 'base64_images' is present but is not a list.
61
- """
62
- if "base64_images" in data:
63
- base64_list = data["base64_images"]
64
- if not isinstance(base64_list, list):
65
- raise ValueError("The 'base64_images' key must contain a list of base64-encoded strings.")
66
-
67
- image_arrays: List[np.ndarray] = []
68
- for b64 in base64_list:
69
- img = base64_to_numpy(b64)
70
- image_arrays.append(img)
71
-
72
- data["image_arrays"] = image_arrays
73
-
74
- elif "base64_image" in data:
75
- # Single-image fallback
76
- img = base64_to_numpy(data["base64_image"])
77
- data["image_arrays"] = [img]
78
-
79
- else:
80
- raise KeyError("Input data must include 'base64_image' or 'base64_images'.")
81
-
82
- return data
83
-
84
- def format_input(self, data: Dict[str, Any], protocol: str, max_batch_size: int, **kwargs) -> Any:
85
- """
86
- Format input data for the specified protocol ("grpc" or "http"), supporting batched data.
87
-
88
- Parameters
89
- ----------
90
- data : dict of str -> Any
91
- The input data dictionary, expected to contain "image_arrays" (list of np.ndarray)
92
- and "image_dims" (list of (height, width) tuples), as produced by prepare_data_for_inference.
93
- protocol : str
94
- The inference protocol, either "grpc" or "http".
95
- max_batch_size : int
96
- The maximum batch size for batching.
97
-
98
- Returns
99
- -------
100
- tuple
101
- A tuple (formatted_batches, formatted_batch_data) where:
102
- - formatted_batches is a list of batches ready for inference.
103
- - formatted_batch_data is a list of scratch-pad dictionaries corresponding to each batch,
104
- containing the keys "image_arrays" and "image_dims" for later post-processing.
105
-
106
- Raises
107
- ------
108
- KeyError
109
- If either "image_arrays" or "image_dims" is not found in `data`.
110
- ValueError
111
- If an invalid protocol is specified.
112
- """
113
-
114
- images = data["image_arrays"]
115
-
116
- dims: List[Dict[str, Any]] = []
117
- data["image_dims"] = dims
118
-
119
- # Helper function to split a list into chunks of size up to chunk_size.
120
- def chunk_list(lst, chunk_size):
121
- return [lst[i : i + chunk_size] for i in range(0, len(lst), chunk_size)]
122
-
123
- if "image_arrays" not in data or "image_dims" not in data:
124
- raise KeyError("Expected 'image_arrays' and 'image_dims' in data. Call prepare_data_for_inference first.")
125
-
126
- images = data["image_arrays"]
127
- dims = data["image_dims"]
128
-
129
- if protocol == "grpc":
130
- logger.debug("Formatting input for gRPC PaddleOCR model (batched).")
131
- processed: List[np.ndarray] = []
132
- for img in images:
133
- arr, _dims = preprocess_image_for_paddle(img)
134
- dims.append(_dims)
135
- arr = arr.astype(np.float32)
136
- arr = np.expand_dims(arr, axis=0) # => shape (1, H, W, C)
137
- processed.append(arr)
138
-
139
- batches = []
140
- batch_data_list = []
141
- for proc_chunk, orig_chunk, dims_chunk in zip(
142
- chunk_list(processed, max_batch_size),
143
- chunk_list(images, max_batch_size),
144
- chunk_list(dims, max_batch_size),
145
- ):
146
- batched_input = np.concatenate(proc_chunk, axis=0)
147
- batches.append(batched_input)
148
- batch_data_list.append({"image_arrays": orig_chunk, "image_dims": dims_chunk})
149
- return batches, batch_data_list
150
-
151
- elif protocol == "http":
152
- logger.debug("Formatting input for HTTP PaddleOCR model (batched).")
153
- if "base64_images" in data:
154
- base64_list = data["base64_images"]
155
- else:
156
- base64_list = [data["base64_image"]]
157
-
158
- input_list: List[Dict[str, Any]] = []
159
- for b64, img in zip(base64_list, images):
160
- image_url = f"data:image/png;base64,{b64}"
161
- image_obj = {"type": "image_url", "url": image_url}
162
- input_list.append(image_obj)
163
- _dims = {"new_width": img.shape[0], "new_height": img.shape[1]}
164
- dims.append(_dims)
165
-
166
- batches = []
167
- batch_data_list = []
168
- for input_chunk, orig_chunk, dims_chunk in zip(
169
- chunk_list(input_list, max_batch_size),
170
- chunk_list(images, max_batch_size),
171
- chunk_list(dims, max_batch_size),
172
- ):
173
- payload = {"input": input_chunk}
174
- batches.append(payload)
175
- batch_data_list.append({"image_arrays": orig_chunk, "image_dims": dims_chunk})
176
-
177
- return batches, batch_data_list
178
-
179
- else:
180
- raise ValueError("Invalid protocol specified. Must be 'grpc' or 'http'.")
181
-
182
- def parse_output(self, response: Any, protocol: str, data: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Any:
183
- """
184
- Parse the model's inference response for the given protocol. The parsing
185
- may handle batched outputs for multiple images.
186
-
187
- Parameters
188
- ----------
189
- response : Any
190
- The raw response from the PaddleOCR model.
191
- protocol : str
192
- The protocol used for inference, "grpc" or "http".
193
- data : dict of str -> Any, optional
194
- Additional data dictionary that may include "image_dims" for bounding box scaling.
195
- **kwargs : Any
196
- Additional keyword arguments, such as custom `table_content_format`.
197
-
198
- Returns
199
- -------
200
- Any
201
- The parsed output, typically a list of (content, table_content_format) tuples.
202
-
203
- Raises
204
- ------
205
- ValueError
206
- If an invalid protocol is specified.
207
- """
208
- # Retrieve image dimensions if available
209
- dims: Optional[List[Tuple[int, int]]] = data.get("image_dims") if data else None
210
-
211
- if protocol == "grpc":
212
- logger.debug("Parsing output from gRPC PaddleOCR model (batched).")
213
- return self._extract_content_from_paddle_grpc_response(response, dims)
214
-
215
- elif protocol == "http":
216
- logger.debug("Parsing output from HTTP PaddleOCR model (batched).")
217
- return self._extract_content_from_paddle_http_response(response)
218
-
219
- else:
220
- raise ValueError("Invalid protocol specified. Must be 'grpc' or 'http'.")
221
-
222
- def process_inference_results(self, output: Any, **kwargs: Any) -> Any:
223
- """
224
- Process inference results for the PaddleOCR model.
225
-
226
- Parameters
227
- ----------
228
- output : Any
229
- The raw output parsed from the PaddleOCR model.
230
- **kwargs : Any
231
- Additional keyword arguments for customization.
232
-
233
- Returns
234
- -------
235
- Any
236
- The post-processed inference results. By default, this simply returns the output
237
- as the table content (or content list).
238
- """
239
- return output
240
-
241
- def _prepare_paddle_payload(self, base64_img: str) -> Dict[str, Any]:
242
- """
243
- DEPRECATED by batch logic in format_input. Kept here if you need single-image direct calls.
244
-
245
- Parameters
246
- ----------
247
- base64_img : str
248
- A single base64-encoded image string.
249
-
250
- Returns
251
- -------
252
- dict of str -> Any
253
- The payload in either legacy or new format for PaddleOCR's HTTP endpoint.
254
- """
255
- image_url = f"data:image/png;base64,{base64_img}"
256
-
257
- image = {"type": "image_url", "url": image_url}
258
- payload = {"input": [image]}
259
-
260
- return payload
261
-
262
- def _extract_content_from_paddle_http_response(
263
- self,
264
- json_response: Dict[str, Any],
265
- ) -> List[Tuple[str, str]]:
266
- """
267
- Extract content from the JSON response of a PaddleOCR HTTP API request.
268
-
269
- Parameters
270
- ----------
271
- json_response : dict of str -> Any
272
- The JSON response returned by the PaddleOCR endpoint.
273
- table_content_format : str or None
274
- The specified format for table content (e.g., 'simple' or 'pseudo_markdown').
275
-
276
- Returns
277
- -------
278
- list of (str, str)
279
- A list of (content, table_content_format) tuples, one for each image result.
280
-
281
- Raises
282
- ------
283
- RuntimeError
284
- If the response format is missing or invalid.
285
- ValueError
286
- If the `table_content_format` is unrecognized.
287
- """
288
- if "data" not in json_response or not json_response["data"]:
289
- raise RuntimeError("Unexpected response format: 'data' key is missing or empty.")
290
-
291
- results: List[str] = []
292
- for item_idx, item in enumerate(json_response["data"]):
293
- text_detections = item.get("text_detections", [])
294
- text_predictions = []
295
- bounding_boxes = []
296
- for td in text_detections:
297
- text_predictions.append(td["text_prediction"]["text"])
298
- bounding_boxes.append([[pt["x"], pt["y"]] for pt in td["bounding_box"]["points"]])
299
-
300
- results.append([bounding_boxes, text_predictions])
301
-
302
- return results
303
-
304
- def _extract_content_from_paddle_grpc_response(
305
- self,
306
- response: np.ndarray,
307
- dimensions: List[Dict[str, Any]],
308
- ) -> List[Tuple[str, str]]:
309
- """
310
- Parse a gRPC response for one or more images. The response can have two possible shapes:
311
- - (3,) for batch_size=1
312
- - (3, n) for batch_size=n
313
-
314
- In either case:
315
- response[0, i]: byte string containing bounding box data
316
- response[1, i]: byte string containing text prediction data
317
- response[2, i]: (Optional) additional data/metadata (ignored here)
318
-
319
- Parameters
320
- ----------
321
- response : np.ndarray
322
- The raw NumPy array from gRPC. Expected shape: (3,) or (3, n).
323
- table_content_format : str
324
- The format of the output text content, e.g. 'simple' or 'pseudo_markdown'.
325
- dims : list of dict, optional
326
- A list of dict for each corresponding image, used for bounding box scaling.
327
-
328
- Returns
329
- -------
330
- list of (str, str)
331
- A list of (content, table_content_format) for each image.
332
-
333
- Raises
334
- ------
335
- ValueError
336
- If the response is not a NumPy array or has an unexpected shape,
337
- or if the `table_content_format` is unrecognized.
338
- """
339
- if not isinstance(response, np.ndarray):
340
- raise ValueError("Unexpected response format: response is not a NumPy array.")
341
-
342
- # If we have shape (3,), convert to (3, 1)
343
- if response.ndim == 1 and response.shape == (3,):
344
- response = response.reshape(3, 1)
345
- elif response.ndim != 2 or response.shape[0] != 3:
346
- raise ValueError(f"Unexpected response shape: {response.shape}. Expecting (3,) or (3, n).")
347
-
348
- batch_size = response.shape[1]
349
- results: List[Tuple[str, str]] = []
350
-
351
- for i in range(batch_size):
352
- # 1) Parse bounding boxes
353
- bboxes_bytestr: bytes = response[0, i]
354
- bounding_boxes = json.loads(bboxes_bytestr.decode("utf8"))
355
-
356
- # 2) Parse text predictions
357
- texts_bytestr: bytes = response[1, i]
358
- text_predictions = json.loads(texts_bytestr.decode("utf8"))
359
-
360
- # 3) Log the third element (extra data/metadata) if needed
361
- extra_data_bytestr: bytes = response[2, i]
362
- logger.debug(f"Ignoring extra_data for image {i}: {extra_data_bytestr}")
363
-
364
- # Some gRPC responses nest single-item lists; flatten them if needed
365
- if isinstance(bounding_boxes, list) and len(bounding_boxes) == 1:
366
- bounding_boxes = bounding_boxes[0]
367
- if isinstance(text_predictions, list) and len(text_predictions) == 1:
368
- text_predictions = text_predictions[0]
369
-
370
- bounding_boxes, text_predictions = self._postprocess_paddle_response(
371
- bounding_boxes,
372
- text_predictions,
373
- dimensions,
374
- img_index=i,
375
- )
376
-
377
- results.append([bounding_boxes, text_predictions])
378
-
379
- return results
380
-
381
- @staticmethod
382
- def _postprocess_paddle_response(
383
- bounding_boxes: List[Any],
384
- text_predictions: List[str],
385
- dims: Optional[List[Dict[str, Any]]] = None,
386
- img_index: int = 0,
387
- ) -> Tuple[List[Any], List[str]]:
388
- """
389
- Convert bounding boxes with normalized coordinates to pixel cooridnates by using
390
- the dimensions. Also shift the coorindates if the inputs were padded. For multiple images,
391
- the correct image dimensions (height, width) are retrieved from `dims[img_index]`.
392
-
393
- Parameters
394
- ----------
395
- bounding_boxes : list of Any
396
- A list (per line of text) of bounding boxes, each a list of (x, y) points.
397
- text_predictions : list of str
398
- A list of text predictions, one for each bounding box.
399
- img_index : int, optional
400
- The index of the image for which bounding boxes are being converted. Default is 0.
401
- dims : list of dict, optional
402
- A list of dictionaries, where each dictionary contains image-specific dimensions
403
- and scaling information:
404
- - "new_width" (int): The width of the image after processing.
405
- - "new_height" (int): The height of the image after processing.
406
- - "pad_width" (int, optional): The width of padding added to the image.
407
- - "pad_height" (int, optional): The height of padding added to the image.
408
- - "scale_factor" (float, optional): The scaling factor applied to the image.
409
-
410
- Returns
411
- -------
412
- Tuple[List[Any], List[str]]
413
- Bounding boxes scaled backed to the original dimensions and detected text lines.
414
-
415
- Notes
416
- -----
417
- - If `dims` is None or `img_index` is out of range, bounding boxes will not be scaled properly.
418
- """
419
- # Default to no scaling if dims are missing or out of range
420
- if not dims:
421
- raise ValueError("No image_dims provided.")
422
- else:
423
- if img_index >= len(dims):
424
- logger.warning("Image index out of range for stored dimensions. Using first image dims by default.")
425
- img_index = 0
426
-
427
- max_width = dims[img_index]["new_width"]
428
- max_height = dims[img_index]["new_height"]
429
- pad_width = dims[img_index].get("pad_width", 0)
430
- pad_height = dims[img_index].get("pad_height", 0)
431
- scale_factor = dims[img_index].get("scale_factor", 1.0)
432
-
433
- bboxes: List[List[float]] = []
434
- texts: List[str] = []
435
-
436
- # Convert normalized coords back to actual pixel coords
437
- for box, txt in zip(bounding_boxes, text_predictions):
438
- if box == "nan":
439
- continue
440
- points: List[List[float]] = []
441
- for point in box:
442
- # Convert normalized coords back to actual pixel coords,
443
- # and shift them back to their original positions if padded.
444
- x_pixels = float(point[0]) * max_width - pad_width
445
- y_pixels = float(point[1]) * max_height - pad_height
446
- x_original = x_pixels / scale_factor
447
- y_original = y_pixels / scale_factor
448
- points.append([x_original, y_original])
449
- bboxes.append(points)
450
- texts.append(txt)
451
-
452
- return bboxes, texts