nv-ingest-api 2025.5.14.dev20250514__py3-none-any.whl → 2025.5.16.dev20250516__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/internal/extract/pptx/pptx_extractor.py +5 -8
- nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +1 -1
- nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +1 -1
- nv_ingest_api/internal/schemas/extract/extract_table_schema.py +1 -1
- nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +26 -12
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +35 -24
- nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +11 -10
- nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +9 -7
- nv_ingest_api/internal/store/image_upload.py +1 -0
- nv_ingest_api/util/__init__.py +3 -0
- nv_ingest_api/util/schema/__init__.py +3 -0
- nv_ingest_api/util/service_clients/redis/__init__.py +3 -0
- nv_ingest_api/util/system/hardware_info.py +4 -0
- {nv_ingest_api-2025.5.14.dev20250514.dist-info → nv_ingest_api-2025.5.16.dev20250516.dist-info}/METADATA +1 -1
- {nv_ingest_api-2025.5.14.dev20250514.dist-info → nv_ingest_api-2025.5.16.dev20250516.dist-info}/RECORD +18 -18
- {nv_ingest_api-2025.5.14.dev20250514.dist-info → nv_ingest_api-2025.5.16.dev20250516.dist-info}/WHEEL +1 -1
- {nv_ingest_api-2025.5.14.dev20250514.dist-info → nv_ingest_api-2025.5.16.dev20250516.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.5.14.dev20250514.dist-info → nv_ingest_api-2025.5.16.dev20250516.dist-info}/top_level.txt +0 -0
|
@@ -99,14 +99,11 @@ def _decode_and_extract_from_pptx(
|
|
|
99
99
|
|
|
100
100
|
# Retrieve extraction parameters (and remove boolean flags as they are consumed).
|
|
101
101
|
extract_params: Dict[str, Any] = prepared_task_props.get("params", {})
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
extract_infographics: bool = extract_params.pop("extract_infographics", False)
|
|
108
|
-
except KeyError as e:
|
|
109
|
-
raise ValueError(f"Missing required extraction flag: {e}")
|
|
102
|
+
extract_text: bool = extract_params.pop("extract_text", False)
|
|
103
|
+
extract_images: bool = extract_params.pop("extract_images", False)
|
|
104
|
+
extract_tables: bool = extract_params.pop("extract_tables", False)
|
|
105
|
+
extract_charts: bool = extract_params.pop("extract_charts", False)
|
|
106
|
+
extract_infographics: bool = extract_params.pop("extract_infographics", False)
|
|
110
107
|
|
|
111
108
|
# Inject additional configuration and trace information.
|
|
112
109
|
if getattr(extraction_config, "pptx_extraction_config", None) is not None:
|
|
@@ -129,7 +129,7 @@ class ChartExtractorSchema(BaseModel):
|
|
|
129
129
|
@field_validator("max_queue_size", "n_workers")
|
|
130
130
|
def check_positive(cls, v, field):
|
|
131
131
|
if v <= 0:
|
|
132
|
-
raise ValueError(f"{field.field_name} must be greater than
|
|
132
|
+
raise ValueError(f"{field.field_name} must be greater than 0.")
|
|
133
133
|
return v
|
|
134
134
|
|
|
135
135
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -122,7 +122,7 @@ class InfographicExtractorSchema(BaseModel):
|
|
|
122
122
|
@field_validator("max_queue_size", "n_workers")
|
|
123
123
|
def check_positive(cls, v, field):
|
|
124
124
|
if v <= 0:
|
|
125
|
-
raise ValueError(f"{field.field_name} must be greater than
|
|
125
|
+
raise ValueError(f"{field.field_name} must be greater than 0.")
|
|
126
126
|
return v
|
|
127
127
|
|
|
128
128
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -122,7 +122,7 @@ class TableExtractorSchema(BaseModel):
|
|
|
122
122
|
@field_validator("max_queue_size", "n_workers")
|
|
123
123
|
def check_positive(cls, v, field):
|
|
124
124
|
if v <= 0:
|
|
125
|
-
raise ValueError(f"{field.field_name} must be greater than
|
|
125
|
+
raise ValueError(f"{field.field_name} must be greater than 0.")
|
|
126
126
|
return v
|
|
127
127
|
|
|
128
128
|
endpoint_config: Optional[TableExtractorConfigSchema] = None
|
|
@@ -2,22 +2,36 @@
|
|
|
2
2
|
# All rights reserved.
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
from typing import Optional, Literal, Annotated
|
|
5
7
|
|
|
6
|
-
from typing import Optional, Literal
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
|
|
9
|
+
class MessageBrokerClientSchema(BaseModel):
|
|
10
|
+
"""
|
|
11
|
+
Configuration schema for message broker client connections.
|
|
12
|
+
Supports Redis or simple in-memory clients.
|
|
13
|
+
"""
|
|
10
14
|
|
|
15
|
+
host: str = Field(default="redis", description="Hostname of the broker service.")
|
|
11
16
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
17
|
+
port: Annotated[int, Field(gt=0, lt=65536)] = Field(
|
|
18
|
+
default=6379, description="Port to connect to. Must be between 1 and 65535."
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
client_type: Literal["redis", "simple"] = Field(
|
|
22
|
+
default="redis", description="Type of broker client. Supported values: 'redis', 'simple'."
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
broker_params: Optional[dict] = Field(
|
|
26
|
+
default_factory=dict, description="Optional parameters passed to the broker client."
|
|
27
|
+
)
|
|
15
28
|
|
|
16
|
-
|
|
17
|
-
|
|
29
|
+
connection_timeout: Annotated[int, Field(ge=0)] = Field(
|
|
30
|
+
default=300, description="Connection timeout in seconds. Must be >= 0."
|
|
31
|
+
)
|
|
18
32
|
|
|
19
|
-
|
|
33
|
+
max_backoff: Annotated[int, Field(ge=0)] = Field(
|
|
34
|
+
default=300, description="Maximum backoff time in seconds. Must be >= 0."
|
|
35
|
+
)
|
|
20
36
|
|
|
21
|
-
|
|
22
|
-
max_backoff: Optional[Annotated[int, Field(ge=0)]] = 300
|
|
23
|
-
max_retries: Optional[Annotated[int, Field(ge=0)]] = 0
|
|
37
|
+
max_retries: Annotated[int, Field(ge=0)] = Field(default=0, description="Maximum number of retries. Must be >= 0.")
|
|
@@ -104,7 +104,7 @@ class IngestTaskDedupSchema(BaseModelNoExt):
|
|
|
104
104
|
|
|
105
105
|
class IngestTaskEmbedSchema(BaseModelNoExt):
|
|
106
106
|
endpoint_url: Optional[str] = None
|
|
107
|
-
|
|
107
|
+
model_name: Optional[str] = None
|
|
108
108
|
api_key: Optional[str] = None
|
|
109
109
|
filter_errors: bool = False
|
|
110
110
|
|
|
@@ -160,29 +160,40 @@ class IngestTaskSchema(BaseModelNoExt):
|
|
|
160
160
|
@model_validator(mode="before")
|
|
161
161
|
@classmethod
|
|
162
162
|
def check_task_properties_type(cls, values):
|
|
163
|
-
task_type
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
TaskTypeEnum
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
163
|
+
task_type = values.get("type")
|
|
164
|
+
task_properties = values.get("task_properties", {})
|
|
165
|
+
|
|
166
|
+
# Ensure task_type is lowercased and converted to enum early
|
|
167
|
+
if isinstance(task_type, str):
|
|
168
|
+
task_type = task_type.lower()
|
|
169
|
+
try:
|
|
170
|
+
task_type = TaskTypeEnum(task_type)
|
|
171
|
+
except ValueError:
|
|
172
|
+
raise ValueError(f"{task_type} is not a valid TaskTypeEnum value")
|
|
173
|
+
|
|
174
|
+
task_type_to_schema = {
|
|
175
|
+
TaskTypeEnum.CAPTION: IngestTaskCaptionSchema,
|
|
176
|
+
TaskTypeEnum.DEDUP: IngestTaskDedupSchema,
|
|
177
|
+
TaskTypeEnum.EMBED: IngestTaskEmbedSchema,
|
|
178
|
+
TaskTypeEnum.EXTRACT: IngestTaskExtractSchema,
|
|
179
|
+
TaskTypeEnum.FILTER: IngestTaskFilterSchema,
|
|
180
|
+
TaskTypeEnum.SPLIT: IngestTaskSplitSchema,
|
|
181
|
+
TaskTypeEnum.STORE_EMBEDDING: IngestTaskStoreEmbedSchema,
|
|
182
|
+
TaskTypeEnum.STORE: IngestTaskStoreSchema,
|
|
183
|
+
TaskTypeEnum.VDB_UPLOAD: IngestTaskVdbUploadSchema,
|
|
184
|
+
TaskTypeEnum.AUDIO_DATA_EXTRACT: IngestTaskAudioExtraction,
|
|
185
|
+
TaskTypeEnum.TABLE_DATA_EXTRACT: IngestTaskTableExtraction,
|
|
186
|
+
TaskTypeEnum.CHART_DATA_EXTRACT: IngestTaskChartExtraction,
|
|
187
|
+
TaskTypeEnum.INFOGRAPHIC_DATA_EXTRACT: IngestTaskInfographicExtraction,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
expected_schema_cls = task_type_to_schema.get(task_type)
|
|
191
|
+
if expected_schema_cls is None:
|
|
192
|
+
raise ValueError(f"Unsupported or missing task_type '{task_type}'")
|
|
193
|
+
|
|
194
|
+
validated_task_properties = expected_schema_cls(**task_properties)
|
|
195
|
+
values["type"] = task_type # ensure type is now always the enum
|
|
196
|
+
values["task_properties"] = validated_task_properties
|
|
186
197
|
return values
|
|
187
198
|
|
|
188
199
|
@field_validator("type", mode="before")
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
7
|
|
|
8
|
-
from pydantic import ConfigDict, BaseModel
|
|
8
|
+
from pydantic import ConfigDict, BaseModel, Field
|
|
9
9
|
|
|
10
10
|
from nv_ingest_api.util.logging.configuration import LogLevel
|
|
11
11
|
|
|
@@ -13,13 +13,14 @@ logger = logging.getLogger(__name__)
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class TextEmbeddingSchema(BaseModel):
|
|
16
|
-
api_key: str = "api_key"
|
|
17
|
-
batch_size: int = 4
|
|
18
|
-
embedding_model: str = "nvidia/nv-embedqa-
|
|
19
|
-
embedding_nim_endpoint: str = "http://embedding:8000/v1"
|
|
20
|
-
encoding_format: str = "float"
|
|
21
|
-
httpx_log_level: LogLevel = LogLevel.WARNING
|
|
22
|
-
input_type: str = "passage"
|
|
23
|
-
raise_on_failure: bool = False
|
|
24
|
-
truncate: str = "END"
|
|
16
|
+
api_key: str = Field(default="api_key")
|
|
17
|
+
batch_size: int = Field(default=4)
|
|
18
|
+
embedding_model: str = Field(default="nvidia/llama-3.2-nv-embedqa-1b-v2")
|
|
19
|
+
embedding_nim_endpoint: str = Field(default="http://embedding:8000/v1")
|
|
20
|
+
encoding_format: str = Field(default="float")
|
|
21
|
+
httpx_log_level: LogLevel = Field(default=LogLevel.WARNING)
|
|
22
|
+
input_type: str = Field(default="passage")
|
|
23
|
+
raise_on_failure: bool = Field(default=False)
|
|
24
|
+
truncate: str = Field(default="END")
|
|
25
|
+
|
|
25
26
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -2,21 +2,23 @@
|
|
|
2
2
|
# All rights reserved.
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
from pydantic import Field, BaseModel, field_validator
|
|
5
|
+
from pydantic import Field, BaseModel, field_validator, ConfigDict
|
|
6
6
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
|
|
9
|
-
from typing_extensions import Annotated
|
|
10
|
-
|
|
11
9
|
|
|
12
10
|
class TextSplitterSchema(BaseModel):
|
|
13
11
|
tokenizer: Optional[str] = None
|
|
14
|
-
chunk_size:
|
|
15
|
-
chunk_overlap:
|
|
12
|
+
chunk_size: int = Field(default=1024, gt=0)
|
|
13
|
+
chunk_overlap: int = Field(default=150, ge=0)
|
|
16
14
|
raise_on_failure: bool = False
|
|
17
15
|
|
|
18
16
|
@field_validator("chunk_overlap")
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
@classmethod
|
|
18
|
+
def check_chunk_overlap(cls, v, values):
|
|
19
|
+
chunk_size = values.data.get("chunk_size")
|
|
20
|
+
if chunk_size is not None and v >= chunk_size:
|
|
21
21
|
raise ValueError("chunk_overlap must be less than chunk_size")
|
|
22
22
|
return v
|
|
23
|
+
|
|
24
|
+
model_config = ConfigDict(extra="forbid")
|
|
@@ -116,6 +116,7 @@ def _upload_images_to_minio(df: pd.DataFrame, params: Dict[str, Any]) -> pd.Data
|
|
|
116
116
|
if "content" not in metadata:
|
|
117
117
|
logger.error("Row %s: missing 'content' in metadata", idx)
|
|
118
118
|
continue
|
|
119
|
+
|
|
119
120
|
if "source_metadata" not in metadata or not isinstance(metadata["source_metadata"], dict):
|
|
120
121
|
logger.error("Row %s: missing or invalid 'source_metadata' in metadata", idx)
|
|
121
122
|
continue
|
nv_ingest_api/util/__init__.py
CHANGED
|
@@ -45,6 +45,10 @@ class SystemResourceProbe:
|
|
|
45
45
|
A value of 0.5 suggests a hyperthread adds 50% extra performance.
|
|
46
46
|
Requires psutil to be installed and report physical cores.
|
|
47
47
|
Defaults to 0.75.
|
|
48
|
+
|
|
49
|
+
Note: the default value of 0.75 is a heuristic and may not be optimal
|
|
50
|
+
for all situations. It is where parallel pdf decomposition efficiency
|
|
51
|
+
is observed to begin rolling off.
|
|
48
52
|
"""
|
|
49
53
|
if not (0.0 <= hyperthread_weight <= 1.0):
|
|
50
54
|
raise ValueError("hyperthread_weight must be between 0.0 and 1.0")
|
|
@@ -35,7 +35,7 @@ nv_ingest_api/internal/extract/pdf/engines/tika.py,sha256=6GyR2l6EsgNZl9jnYDXLeK
|
|
|
35
35
|
nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py,sha256=jrv2B4VZAH4PevAQrFz965qz8UyXq3rViiOTbGLejec,14908
|
|
36
36
|
nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py,sha256=Jk3wrQ2CZs167juvEZ-uV6qXWQjR08hhIu8otk2MWj4,4931
|
|
37
37
|
nv_ingest_api/internal/extract/pptx/__init__.py,sha256=HIHfzSig66GT0Uk8qsGBm_f13fKYcPtItBicRUWOOVA,183
|
|
38
|
-
nv_ingest_api/internal/extract/pptx/pptx_extractor.py,sha256=
|
|
38
|
+
nv_ingest_api/internal/extract/pptx/pptx_extractor.py,sha256=o-0P2dDyRFW37uQi_lKk6-eFozTcZvbq-2Y4I0EBMIY,7749
|
|
39
39
|
nv_ingest_api/internal/extract/pptx/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
40
|
nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py,sha256=Lg2I1Zq-WJagsZibgyn__8T-M86BjkqAiXWNta9X_EU,29430
|
|
41
41
|
nv_ingest_api/internal/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
@@ -66,20 +66,20 @@ nv_ingest_api/internal/primitives/tracing/tagging.py,sha256=O5dD7Z7j43nrjqn0Axhx
|
|
|
66
66
|
nv_ingest_api/internal/schemas/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
67
67
|
nv_ingest_api/internal/schemas/extract/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
68
68
|
nv_ingest_api/internal/schemas/extract/extract_audio_schema.py,sha256=VVppZgV1lnyJCTfADexzoj3V0lOSq3t6Dw_6VhIxZ7k,3771
|
|
69
|
-
nv_ingest_api/internal/schemas/extract/extract_chart_schema.py,sha256=
|
|
69
|
+
nv_ingest_api/internal/schemas/extract/extract_chart_schema.py,sha256=iu8lHQC0zbBB9VRK7PZisAVzpeSpFqjcXRAnwZ9OzoM,4301
|
|
70
70
|
nv_ingest_api/internal/schemas/extract/extract_docx_schema.py,sha256=M2N7WjMNvSemHcJHWeNUD_kFG0wC5VE2W3K6SVrJqvA,3761
|
|
71
71
|
nv_ingest_api/internal/schemas/extract/extract_image_schema.py,sha256=GC4xV8Z9TPLOuxlEtf2fbklSSp8ETGMrDpZgMQ02UwA,3766
|
|
72
|
-
nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py,sha256=
|
|
72
|
+
nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py,sha256=rl_hFDoJaJLTKbtnEpDSBj-73KQL9aUEVKGiW0IdXiU,3991
|
|
73
73
|
nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py,sha256=G9g1lEORmryUWTzDyZ0vHAuPnVMK7VaRx0E4xzmAw3Q,6589
|
|
74
74
|
nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py,sha256=5dT0kv-Mmpe5KW-BZc1JOW3rUlgzVZI0rpB79NWytmw,3761
|
|
75
|
-
nv_ingest_api/internal/schemas/extract/extract_table_schema.py,sha256=
|
|
75
|
+
nv_ingest_api/internal/schemas/extract/extract_table_schema.py,sha256=sbt3TvQrLsXc8-muKnsyOs4MfpA4VzrprYHdu1IrY8M,3950
|
|
76
76
|
nv_ingest_api/internal/schemas/message_brokers/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
|
|
77
|
-
nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py,sha256=
|
|
77
|
+
nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py,sha256=4xTSFE_vH7yZE9RRJRflFAG9hNXIaF6K020M_xA7ylw,1351
|
|
78
78
|
nv_ingest_api/internal/schemas/message_brokers/request_schema.py,sha256=LZX_wXDxTamVFqTQs2Yd8uvWyPE5mddHAWSU4PtfEIQ,966
|
|
79
79
|
nv_ingest_api/internal/schemas/message_brokers/response_schema.py,sha256=4b275HlzBSzpmuE2wdoeaGKPCdKki3wuWldtRIfrj8w,727
|
|
80
80
|
nv_ingest_api/internal/schemas/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
81
81
|
nv_ingest_api/internal/schemas/meta/base_model_noext.py,sha256=8hXU1uuiqZ6t8EsoZ8vlC5EFf2zSZrKEX133FcfZMwI,316
|
|
82
|
-
nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=
|
|
82
|
+
nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=pkMOKIyfAmVcimqZ-zDXngW_lzYxdXYHv8C8cdduUvA,8083
|
|
83
83
|
nv_ingest_api/internal/schemas/meta/metadata_schema.py,sha256=_FAE-yeb01hxq05SXrV3NLM4DPUPSfnIbH6ZMliWsEg,6625
|
|
84
84
|
nv_ingest_api/internal/schemas/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
85
85
|
nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py,sha256=k1JOdlPPpsipc0XhHf-9YxJ_-W0HvpVE1ZhYmr7fzj0,395
|
|
@@ -89,16 +89,16 @@ nv_ingest_api/internal/schemas/store/store_image_schema.py,sha256=p2LGij9i6sG6RY
|
|
|
89
89
|
nv_ingest_api/internal/schemas/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
90
90
|
nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py,sha256=xLxXJsm8QeaL7KPe7m5sP2rd_AuNRMX29rdeVdoei3Y,582
|
|
91
91
|
nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py,sha256=31ThI5fr0yyENeJeE1xMAA-pxk1QVJLwM842zMate_k,429
|
|
92
|
-
nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=
|
|
93
|
-
nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py,sha256=
|
|
92
|
+
nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py,sha256=ongmHkJA2953f9_RI7ZYzf5BUnFzVL6Al5E8WKyfgw4,885
|
|
93
|
+
nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py,sha256=D9K8tvu-tkEBQkZo7uuRzgrHdGyM3ZcNycHbHy5HV2E,791
|
|
94
94
|
nv_ingest_api/internal/store/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
95
95
|
nv_ingest_api/internal/store/embed_text_upload.py,sha256=maxb4FPsBvWgvlrjAPEBlRZEFdJX5NxPG-p8kUbzV7I,9898
|
|
96
|
-
nv_ingest_api/internal/store/image_upload.py,sha256=
|
|
96
|
+
nv_ingest_api/internal/store/image_upload.py,sha256=GNlY4k3pfcHv3lzXxkbmGLeHFsf9PI25bkBn6Xn9h3I,9654
|
|
97
97
|
nv_ingest_api/internal/transform/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
98
98
|
nv_ingest_api/internal/transform/caption_image.py,sha256=RYL_b26zfaRlbHz0XvLw9HwaMlXpNhr7gayjxGzdALQ,8545
|
|
99
99
|
nv_ingest_api/internal/transform/embed_text.py,sha256=F8kg-WXihtuUMwDQUUYjnfGDCdQp1Mkd-jeThOiJT0s,16507
|
|
100
100
|
nv_ingest_api/internal/transform/split_text.py,sha256=y6NYRkCEVpVsDu-AqrKx2D6JPp1vwxclw9obNZNJIIs,6561
|
|
101
|
-
nv_ingest_api/util/__init__.py,sha256=
|
|
101
|
+
nv_ingest_api/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
102
102
|
nv_ingest_api/util/control_message/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
103
103
|
nv_ingest_api/util/control_message/validators.py,sha256=KvvbyheJ5rbzvJbH9JKpMR9VfoI0b0uM6eTAZte1p44,1315
|
|
104
104
|
nv_ingest_api/util/converters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -135,20 +135,20 @@ nv_ingest_api/util/multi_processing/mp_pool_singleton.py,sha256=dTfP82DgGPaXEJH3
|
|
|
135
135
|
nv_ingest_api/util/nim/__init__.py,sha256=UqbiXFCqjWcjNvoduXd_0gOUOGBT8JvppiYHOmMyneA,1775
|
|
136
136
|
nv_ingest_api/util/pdf/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
|
|
137
137
|
nv_ingest_api/util/pdf/pdfium.py,sha256=Ch9Gh5jRLcBr3stjCckqWwTUL-T0sI50PlQnZHo_9NA,15761
|
|
138
|
-
nv_ingest_api/util/schema/__init__.py,sha256=
|
|
138
|
+
nv_ingest_api/util/schema/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
139
139
|
nv_ingest_api/util/schema/schema_validator.py,sha256=H0yZ_i_HZaiBRUCGmTBfRB9-hURhVqyd10aS_ynM1_0,321
|
|
140
140
|
nv_ingest_api/util/service_clients/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
141
141
|
nv_ingest_api/util/service_clients/client_base.py,sha256=eCOeq3Rr6Xnnsh-oHszYlQTOffQyzsT8s43V4V8H_h8,2716
|
|
142
142
|
nv_ingest_api/util/service_clients/kafka/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1IYX6XlZrTfx6T1cIhdILwG8,140
|
|
143
|
-
nv_ingest_api/util/service_clients/redis/__init__.py,sha256=
|
|
143
|
+
nv_ingest_api/util/service_clients/redis/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
144
144
|
nv_ingest_api/util/service_clients/redis/redis_client.py,sha256=3NLecvIvVN1v-sA7d7G-_f6qJVZyfJE2H8Iu5KG3Aew,37417
|
|
145
145
|
nv_ingest_api/util/service_clients/rest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
146
146
|
nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=dZ-jrk7IK7oNtHoXFSNTf7psoOpLREiLN5ezpHFW0HI,21732
|
|
147
147
|
nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
|
|
148
148
|
nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
149
|
-
nv_ingest_api/util/system/hardware_info.py,sha256=
|
|
150
|
-
nv_ingest_api-2025.5.
|
|
151
|
-
nv_ingest_api-2025.5.
|
|
152
|
-
nv_ingest_api-2025.5.
|
|
153
|
-
nv_ingest_api-2025.5.
|
|
154
|
-
nv_ingest_api-2025.5.
|
|
149
|
+
nv_ingest_api/util/system/hardware_info.py,sha256=ORZeKpH9kSGU_vuPhyBwkIiMyCViKUX2CP__MCjrfbU,19463
|
|
150
|
+
nv_ingest_api-2025.5.16.dev20250516.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
151
|
+
nv_ingest_api-2025.5.16.dev20250516.dist-info/METADATA,sha256=hmBwy3Vk0ZoC6eSgpHhHS6dXuqdPLm_RQfEPY5sVAQQ,13889
|
|
152
|
+
nv_ingest_api-2025.5.16.dev20250516.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
|
153
|
+
nv_ingest_api-2025.5.16.dev20250516.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
|
|
154
|
+
nv_ingest_api-2025.5.16.dev20250516.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|