nv-ingest-client 2025.10.26.dev20251026__tar.gz → 2025.10.27.dev20251027__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-client might be problematic. Click here for more details.
- {nv_ingest_client-2025.10.26.dev20251026/src/nv_ingest_client.egg-info → nv_ingest_client-2025.10.27.dev20251027}/PKG-INFO +1 -1
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/embed.py +16 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/udf.py +24 -27
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027/src/nv_ingest_client.egg-info}/PKG-INFO +1 -1
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/LICENSE +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/MANIFEST.in +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/README.md +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/pyproject.toml +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/setup.cfg +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/__init__.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/cli/__init__.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/cli/util/__init__.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/cli/util/click.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/cli/util/processing.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/cli/util/system.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/client/__init__.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/client/client.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/client/ingest_job_handler.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/client/interface.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/client/util/processing.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/nv_ingest_cli.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/__init__.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/jobs/__init__.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/jobs/job_spec.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/jobs/job_state.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/__init__.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/audio_extraction.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/caption.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/chart_extraction.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/dedup.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/extract.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/filter.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/infographic_extraction.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/split.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/store.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/table_extraction.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/task_base.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/task_factory.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/vdb_upload.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/__init__.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/dataset.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/document_analysis.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/file_processing/__init__.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/file_processing/extract.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/image_disk_utils.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/milvus.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/process_json_files.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/processing.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/system.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/transport.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/util.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/vdb/__init__.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/vdb/adt_vdb.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/vdb/milvus.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/vdb/opensearch.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/zipkin.py +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client.egg-info/SOURCES.txt +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client.egg-info/dependency_links.txt +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client.egg-info/entry_points.txt +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client.egg-info/requires.txt +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client.egg-info/top_level.txt +0 -0
- {nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/version.py +0 -0
|
@@ -36,6 +36,8 @@ class EmbedTask(Task):
|
|
|
36
36
|
image_elements_modality: Optional[str] = None,
|
|
37
37
|
structured_elements_modality: Optional[str] = None,
|
|
38
38
|
audio_elements_modality: Optional[str] = None,
|
|
39
|
+
custom_content_field: Optional[str] = None,
|
|
40
|
+
result_target_field: Optional[str] = None,
|
|
39
41
|
) -> None:
|
|
40
42
|
"""
|
|
41
43
|
Initialize the EmbedTask configuration.
|
|
@@ -76,6 +78,8 @@ class EmbedTask(Task):
|
|
|
76
78
|
image_elements_modality=image_elements_modality,
|
|
77
79
|
structured_elements_modality=structured_elements_modality,
|
|
78
80
|
audio_elements_modality=audio_elements_modality,
|
|
81
|
+
custom_content_field=custom_content_field,
|
|
82
|
+
result_target_field=result_target_field,
|
|
79
83
|
)
|
|
80
84
|
|
|
81
85
|
self._endpoint_url = validated_data.endpoint_url
|
|
@@ -86,6 +90,8 @@ class EmbedTask(Task):
|
|
|
86
90
|
self._image_elements_modality = validated_data.image_elements_modality
|
|
87
91
|
self._structured_elements_modality = validated_data.structured_elements_modality
|
|
88
92
|
self._audio_elements_modality = validated_data.audio_elements_modality
|
|
93
|
+
self._custom_content_field = validated_data.custom_content_field
|
|
94
|
+
self._result_target_field = validated_data.result_target_field
|
|
89
95
|
|
|
90
96
|
def __str__(self) -> str:
|
|
91
97
|
"""
|
|
@@ -114,6 +120,10 @@ class EmbedTask(Task):
|
|
|
114
120
|
info += f" structured_elements_modality: {self._structured_elements_modality}\n"
|
|
115
121
|
if self._audio_elements_modality:
|
|
116
122
|
info += f" audio_elements_modality: {self._audio_elements_modality}\n"
|
|
123
|
+
if self._custom_content_field:
|
|
124
|
+
info += f" custom_content_field: {self._custom_content_field}\n"
|
|
125
|
+
if self._result_target_field:
|
|
126
|
+
info += f" result_target_field: {self.result_target_field}\n"
|
|
117
127
|
return info
|
|
118
128
|
|
|
119
129
|
def to_dict(self) -> Dict[str, Any]:
|
|
@@ -149,4 +159,10 @@ class EmbedTask(Task):
|
|
|
149
159
|
if self._audio_elements_modality:
|
|
150
160
|
task_properties["audio_elements_modality"] = self._audio_elements_modality
|
|
151
161
|
|
|
162
|
+
if self._custom_content_field:
|
|
163
|
+
task_properties["custom_content_field"] = self._custom_content_field
|
|
164
|
+
|
|
165
|
+
if self._result_target_field:
|
|
166
|
+
task_properties["result_target_field"] = self.result_target_field
|
|
167
|
+
|
|
152
168
|
return {"type": "embed", "task_properties": task_properties}
|
|
@@ -11,6 +11,7 @@ import logging
|
|
|
11
11
|
import importlib
|
|
12
12
|
import inspect
|
|
13
13
|
import ast
|
|
14
|
+
import re
|
|
14
15
|
from typing import Dict, Optional, Union
|
|
15
16
|
|
|
16
17
|
from nv_ingest_api.internal.enums.common import PipelinePhase
|
|
@@ -122,54 +123,50 @@ def _resolve_udf_function(udf_function_spec: str) -> str:
|
|
|
122
123
|
3. File path: '/path/to/file.py:my_function'
|
|
123
124
|
4. Legacy import path: 'my_module.my_function' (function name only, no imports)
|
|
124
125
|
"""
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
126
|
+
# Default to treating as inline unless it clearly matches a
|
|
127
|
+
# module/file specification. This avoids misclassifying inline code that
|
|
128
|
+
# contains colons, imports, or annotations before the def line.
|
|
128
129
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
130
|
+
spec = udf_function_spec.strip()
|
|
131
|
+
|
|
132
|
+
# 1) File path with function: /path/to/file.py:function_name
|
|
133
|
+
if ".py:" in spec:
|
|
134
|
+
file_path, function_name = spec.split(":", 1)
|
|
132
135
|
return _extract_function_with_context(file_path, function_name)
|
|
133
136
|
|
|
134
|
-
|
|
135
|
-
|
|
137
|
+
# 2) File path without function name is an explicit error
|
|
138
|
+
if spec.endswith(".py"):
|
|
136
139
|
raise ValueError(
|
|
137
|
-
f"File path '{udf_function_spec}' is missing function name. "
|
|
138
|
-
f"Use format 'file.py:function_name' to specify which function to use."
|
|
140
|
+
f"File path '{udf_function_spec}' is missing function name. Use format 'file.py:function_name'."
|
|
139
141
|
)
|
|
140
142
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
143
|
+
# 3) Module path with colon: my.module:function
|
|
144
|
+
# Be strict: only letters, numbers, underscore, and dots on the left; valid identifier on the right;
|
|
145
|
+
# no whitespace/newlines.
|
|
146
|
+
module_colon_pattern = re.compile(r"^[A-Za-z_][\w\.]*:[A-Za-z_][\w]*$")
|
|
147
|
+
if module_colon_pattern.match(spec):
|
|
148
|
+
module_path, function_name = spec.split(":", 1)
|
|
146
149
|
try:
|
|
147
|
-
# Import the module to get its file path
|
|
148
150
|
module = importlib.import_module(module_path)
|
|
149
151
|
module_file = inspect.getfile(module)
|
|
150
|
-
|
|
151
|
-
# Extract the function with full module context
|
|
152
152
|
return _extract_function_with_context(module_file, function_name)
|
|
153
|
-
|
|
154
153
|
except ImportError as e:
|
|
155
154
|
raise ValueError(f"Failed to import module '{module_path}': {e}")
|
|
156
155
|
except Exception as e:
|
|
157
156
|
raise ValueError(f"Failed to resolve module path '{module_path}': {e}")
|
|
158
157
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
func = _load_function_from_import_path(
|
|
163
|
-
|
|
164
|
-
# Get the source code of the function only
|
|
158
|
+
# 4) Legacy import path: my.module.function (no colon)
|
|
159
|
+
legacy_import_pattern = re.compile(r"^[A-Za-z_][\w\.]*\.[A-Za-z_][\w]*$")
|
|
160
|
+
if legacy_import_pattern.match(spec):
|
|
161
|
+
func = _load_function_from_import_path(spec)
|
|
165
162
|
try:
|
|
166
163
|
source = inspect.getsource(func)
|
|
167
164
|
return source
|
|
168
165
|
except (OSError, TypeError) as e:
|
|
169
166
|
raise ValueError(f"Could not get source code for function from '{udf_function_spec}': {e}")
|
|
170
167
|
|
|
171
|
-
|
|
172
|
-
|
|
168
|
+
# 5) Default: treat as inline UDF source (entire string)
|
|
169
|
+
return udf_function_spec
|
|
173
170
|
|
|
174
171
|
|
|
175
172
|
class UDFTask(Task):
|
|
File without changes
|
{nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/MANIFEST.in
RENAMED
|
File without changes
|
{nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/README.md
RENAMED
|
File without changes
|
{nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/pyproject.toml
RENAMED
|
File without changes
|
{nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/setup.cfg
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest_client-2025.10.26.dev20251026 → nv_ingest_client-2025.10.27.dev20251027}/src/version.py
RENAMED
|
File without changes
|