nv-ingest-api 2025.5.13.dev20250513__py3-none-any.whl → 2025.5.14.dev20250514__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/internal/extract/image/chart_extractor.py +3 -3
- nv_ingest_api/internal/extract/image/image_helpers/common.py +1 -1
- nv_ingest_api/internal/extract/image/infographic_extractor.py +1 -1
- nv_ingest_api/internal/extract/image/table_extractor.py +2 -2
- nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +1 -1
- nv_ingest_api/internal/extract/pdf/engines/pdfium.py +1 -1
- nv_ingest_api/util/image_processing/processing.py +1 -1
- nv_ingest_api/util/pdf/pdfium.py +1 -1
- {nv_ingest_api-2025.5.13.dev20250513.dist-info → nv_ingest_api-2025.5.14.dev20250514.dist-info}/METADATA +1 -1
- {nv_ingest_api-2025.5.13.dev20250513.dist-info → nv_ingest_api-2025.5.14.dev20250514.dist-info}/RECORD +13 -13
- {nv_ingest_api-2025.5.13.dev20250513.dist-info → nv_ingest_api-2025.5.14.dev20250514.dist-info}/WHEEL +1 -1
- {nv_ingest_api-2025.5.13.dev20250513.dist-info → nv_ingest_api-2025.5.14.dev20250514.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.5.13.dev20250513.dist-info → nv_ingest_api-2025.5.14.dev20250514.dist-info}/top_level.txt +0 -0
|
@@ -27,7 +27,7 @@ from nv_ingest_api.util.nim import create_inference_client
|
|
|
27
27
|
PADDLE_MIN_WIDTH = 32
|
|
28
28
|
PADDLE_MIN_HEIGHT = 32
|
|
29
29
|
|
|
30
|
-
logger = logging.getLogger(f"
|
|
30
|
+
logger = logging.getLogger(f"ray.{__name__}")
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def _filter_valid_chart_images(
|
|
@@ -80,7 +80,7 @@ def _run_chart_inference(
|
|
|
80
80
|
yolox_client.infer,
|
|
81
81
|
data=data_yolox,
|
|
82
82
|
model_name="yolox",
|
|
83
|
-
stage_name="
|
|
83
|
+
stage_name="chart_extraction",
|
|
84
84
|
max_batch_size=8,
|
|
85
85
|
trace_info=trace_info,
|
|
86
86
|
)
|
|
@@ -88,7 +88,7 @@ def _run_chart_inference(
|
|
|
88
88
|
paddle_client.infer,
|
|
89
89
|
data=data_paddle,
|
|
90
90
|
model_name="paddle",
|
|
91
|
-
stage_name="
|
|
91
|
+
stage_name="chart_extraction",
|
|
92
92
|
max_batch_size=1 if paddle_client.protocol == "grpc" else 2,
|
|
93
93
|
trace_info=trace_info,
|
|
94
94
|
)
|
|
@@ -223,7 +223,7 @@ def extract_page_elements_from_images(
|
|
|
223
223
|
model_name="yolox",
|
|
224
224
|
max_batch_size=YOLOX_MAX_BATCH_SIZE,
|
|
225
225
|
trace_info=trace_info,
|
|
226
|
-
stage_name="
|
|
226
|
+
stage_name="pdf_extraction",
|
|
227
227
|
)
|
|
228
228
|
|
|
229
229
|
# Process each result along with its corresponding image.
|
|
@@ -100,7 +100,7 @@ def _update_infographic_metadata(
|
|
|
100
100
|
paddle_results = paddle_client.infer(
|
|
101
101
|
data=data_paddle,
|
|
102
102
|
model_name="paddle",
|
|
103
|
-
stage_name="
|
|
103
|
+
stage_name="infographic_extraction",
|
|
104
104
|
max_batch_size=1 if paddle_client.protocol == "grpc" else 2,
|
|
105
105
|
trace_info=trace_info,
|
|
106
106
|
)
|
|
@@ -81,7 +81,7 @@ def _run_inference(
|
|
|
81
81
|
yolox_client.infer,
|
|
82
82
|
data=data_yolox,
|
|
83
83
|
model_name="yolox",
|
|
84
|
-
stage_name="
|
|
84
|
+
stage_name="table_extraction",
|
|
85
85
|
max_batch_size=8,
|
|
86
86
|
trace_info=trace_info,
|
|
87
87
|
)
|
|
@@ -89,7 +89,7 @@ def _run_inference(
|
|
|
89
89
|
paddle_client.infer,
|
|
90
90
|
data=data_paddle,
|
|
91
91
|
model_name="paddle",
|
|
92
|
-
stage_name="
|
|
92
|
+
stage_name="table_extraction",
|
|
93
93
|
max_batch_size=1 if paddle_client.protocol == "grpc" else 2,
|
|
94
94
|
trace_info=trace_info,
|
|
95
95
|
)
|
|
@@ -466,7 +466,7 @@ def _extract_text_and_bounding_boxes(
|
|
|
466
466
|
inference_results = nemoretriever_parse_client.infer(
|
|
467
467
|
data=data,
|
|
468
468
|
model_name="nemoretriever_parse",
|
|
469
|
-
stage_name="
|
|
469
|
+
stage_name="pdf_extraction",
|
|
470
470
|
max_batch_size=NEMORETRIEVER_PARSE_MAX_BATCH_SIZE,
|
|
471
471
|
execution_trace_log=execution_trace_log,
|
|
472
472
|
)
|
|
@@ -105,7 +105,7 @@ def _extract_page_elements_using_image_ensemble(
|
|
|
105
105
|
model_name="yolox",
|
|
106
106
|
max_batch_size=YOLOX_MAX_BATCH_SIZE,
|
|
107
107
|
trace_info=execution_trace_log,
|
|
108
|
-
stage_name="
|
|
108
|
+
stage_name="pdf_extraction",
|
|
109
109
|
)
|
|
110
110
|
|
|
111
111
|
# Process results: iterate over each image's inference output.
|
|
@@ -150,7 +150,7 @@ def extract_tables_and_charts_yolox(
|
|
|
150
150
|
min_score=YOLOX_MIN_SCORE,
|
|
151
151
|
final_thresh=YOLOX_FINAL_SCORE,
|
|
152
152
|
trace_info=trace_info,
|
|
153
|
-
stage_name="
|
|
153
|
+
stage_name="pdf_extraction",
|
|
154
154
|
)
|
|
155
155
|
|
|
156
156
|
# Process results: iterate over each image's inference output.
|
nv_ingest_api/util/pdf/pdfium.py
CHANGED
|
@@ -119,7 +119,7 @@ def pdfium_try_get_bitmap_as_numpy(image_obj) -> np.ndarray:
|
|
|
119
119
|
return img_array
|
|
120
120
|
|
|
121
121
|
|
|
122
|
-
@traceable_func(trace_name="
|
|
122
|
+
@traceable_func(trace_name="pdf_extraction::pdfium_pages_to_numpy")
|
|
123
123
|
def pdfium_pages_to_numpy(
|
|
124
124
|
pages: List[pdfium.PdfPage],
|
|
125
125
|
render_dpi: int = 300,
|
|
@@ -18,19 +18,19 @@ nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py,sha25
|
|
|
18
18
|
nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py,sha256=1wkciAxu8lz9WuPuoleJFy2s09ieSzXl1S71F9r0BWA,4385
|
|
19
19
|
nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py,sha256=CM2yV8lfEw1F1ORAjupD4gyIKX0PDDJrL3nsZ5Mnrgg,31539
|
|
20
20
|
nv_ingest_api/internal/extract/image/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
21
|
-
nv_ingest_api/internal/extract/image/chart_extractor.py,sha256=
|
|
21
|
+
nv_ingest_api/internal/extract/image/chart_extractor.py,sha256=CkaW8ihPmGMQGrZh0ih14gtEpWuGOJ8InPQfZwpsP2g,13300
|
|
22
22
|
nv_ingest_api/internal/extract/image/image_extractor.py,sha256=4tUWinuFMN3ukWa2tZa2_LtzRiTyUAUCBF6BDkUEvm0,8705
|
|
23
|
-
nv_ingest_api/internal/extract/image/infographic_extractor.py,sha256=
|
|
24
|
-
nv_ingest_api/internal/extract/image/table_extractor.py,sha256=
|
|
23
|
+
nv_ingest_api/internal/extract/image/infographic_extractor.py,sha256=yc9b2q_Ea08CEVclZ47UkpU4F7qlakPuU3UV9P013W0,8903
|
|
24
|
+
nv_ingest_api/internal/extract/image/table_extractor.py,sha256=ivHaJxYjeHvFM1PZIpxVabPadxtcTsu51j398ZjMhD4,13123
|
|
25
25
|
nv_ingest_api/internal/extract/image/image_helpers/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
26
|
-
nv_ingest_api/internal/extract/image/image_helpers/common.py,sha256=
|
|
26
|
+
nv_ingest_api/internal/extract/image/image_helpers/common.py,sha256=P8rcl4YPyeWeMJg7u1yejD3k9EnDVEbJgfYEnJ4WO5c,15025
|
|
27
27
|
nv_ingest_api/internal/extract/pdf/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
28
28
|
nv_ingest_api/internal/extract/pdf/pdf_extractor.py,sha256=CxtWaD6mql9MEqSdk2CfSQ9T-Bn87beBkCOuGGjxGt8,2934
|
|
29
29
|
nv_ingest_api/internal/extract/pdf/engines/__init__.py,sha256=u4GnAZmDKRl0RwYGIRiozIRw70Kybw3A72-lcKFeoTI,582
|
|
30
30
|
nv_ingest_api/internal/extract/pdf/engines/adobe.py,sha256=VT0dEqkU-y2uGkaCqxtKYov_Q8R1028UQVBchgMLca4,17466
|
|
31
31
|
nv_ingest_api/internal/extract/pdf/engines/llama.py,sha256=PpKTqS8jGHBV6mKLGZWwjpfT8ga6Fy8ffrvL-gPAf2c,8182
|
|
32
|
-
nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=
|
|
33
|
-
nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=
|
|
32
|
+
nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py,sha256=Uqj1NH7yWga9P6_vCzgny1WKALfF--UdAaGHUF8K_aQ,22926
|
|
33
|
+
nv_ingest_api/internal/extract/pdf/engines/pdfium.py,sha256=fDbrZwJ-lgeHYOq107WXehzdSvyF8zEDza_9UkDm5aE,22360
|
|
34
34
|
nv_ingest_api/internal/extract/pdf/engines/tika.py,sha256=6GyR2l6EsgNZl9jnYDXLeKNK9Fj2Mw9y2UWDq-eSkOc,3169
|
|
35
35
|
nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py,sha256=jrv2B4VZAH4PevAQrFz965qz8UyXq3rViiOTbGLejec,14908
|
|
36
36
|
nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=Jk3wrQ2CZs167juvEZ-uV6qXWQjR08hhIu8otk2MWj4,4931
|
|
@@ -118,7 +118,7 @@ nv_ingest_api/util/exception_handlers/pdf.py,sha256=FUC41QJKDCfiTv-1c1_8Isxwt1xM
|
|
|
118
118
|
nv_ingest_api/util/exception_handlers/schemas.py,sha256=NJngVNf9sk5Uz6CFFfkNO_LBAMt2QZUcMYGxX64oYRk,2179
|
|
119
119
|
nv_ingest_api/util/image_processing/__init__.py,sha256=Jiy8C1ZuSrNb_eBM1ZTV9IKFIsnjhZi6Ku3JJhVLimA,104
|
|
120
120
|
nv_ingest_api/util/image_processing/clustering.py,sha256=sUGlZI4cx1q8h4Pns1N9JVpdfSM2BOH8zRmn9QFCtzI,9236
|
|
121
|
-
nv_ingest_api/util/image_processing/processing.py,sha256=
|
|
121
|
+
nv_ingest_api/util/image_processing/processing.py,sha256=LSoDDEmahr7a-qSS12McVcowRe3dOrAZwa1h-PD_JPQ,6554
|
|
122
122
|
nv_ingest_api/util/image_processing/table_and_chart.py,sha256=bxOu9PZYkG_WFCDGw_JLaO60S2pDSN8EOWK3xkIwr2A,14376
|
|
123
123
|
nv_ingest_api/util/image_processing/transforms.py,sha256=Kz9hrizV314Hy7cRCYK9ZmhmBbVUOZ_z0HEpzZYcslQ,14081
|
|
124
124
|
nv_ingest_api/util/logging/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -134,7 +134,7 @@ nv_ingest_api/util/multi_processing/__init__.py,sha256=4fojP8Rp_5Hu1YAkqGylqTyEZ
|
|
|
134
134
|
nv_ingest_api/util/multi_processing/mp_pool_singleton.py,sha256=dTfP82DgGPaXEJH3jywTO8rNlLZUniD4FFzwv84_giE,7372
|
|
135
135
|
nv_ingest_api/util/nim/__init__.py,sha256=UqbiXFCqjWcjNvoduXd_0gOUOGBT8JvppiYHOmMyneA,1775
|
|
136
136
|
nv_ingest_api/util/pdf/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
|
|
137
|
-
nv_ingest_api/util/pdf/pdfium.py,sha256=
|
|
137
|
+
nv_ingest_api/util/pdf/pdfium.py,sha256=Ch9Gh5jRLcBr3stjCckqWwTUL-T0sI50PlQnZHo_9NA,15761
|
|
138
138
|
nv_ingest_api/util/schema/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
139
139
|
nv_ingest_api/util/schema/schema_validator.py,sha256=H0yZ_i_HZaiBRUCGmTBfRB9-hURhVqyd10aS_ynM1_0,321
|
|
140
140
|
nv_ingest_api/util/service_clients/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
@@ -147,8 +147,8 @@ nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=dZ-jrk7IK7oNtHoXFS
|
|
|
147
147
|
nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
|
|
148
148
|
nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
149
149
|
nv_ingest_api/util/system/hardware_info.py,sha256=JGxBbF3kvgYbwhhWvtjNzPxVZQV_npmsordAioBrglo,19252
|
|
150
|
-
nv_ingest_api-2025.5.
|
|
151
|
-
nv_ingest_api-2025.5.
|
|
152
|
-
nv_ingest_api-2025.5.
|
|
153
|
-
nv_ingest_api-2025.5.
|
|
154
|
-
nv_ingest_api-2025.5.
|
|
150
|
+
nv_ingest_api-2025.5.14.dev20250514.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
151
|
+
nv_ingest_api-2025.5.14.dev20250514.dist-info/METADATA,sha256=ovvmVcwxqjyt5DQyPur4gOwFTmMRNiB-0S0lVN9v9OU,13889
|
|
152
|
+
nv_ingest_api-2025.5.14.dev20250514.dist-info/WHEEL,sha256=QZxptf4Y1BKFRCEDxD4h2V0mBFQOVFLFEpvxHmIs52A,91
|
|
153
|
+
nv_ingest_api-2025.5.14.dev20250514.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
|
|
154
|
+
nv_ingest_api-2025.5.14.dev20250514.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|