nv-ingest-client 2025.10.25.dev20251025__tar.gz → 2025.10.27.dev20251027__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

Files changed (62) hide show
  1. {nv_ingest_client-2025.10.25.dev20251025/src/nv_ingest_client.egg-info → nv_ingest_client-2025.10.27.dev20251027}/PKG-INFO +1 -1
  2. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/embed.py +16 -0
  3. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/udf.py +24 -27
  4. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027/src/nv_ingest_client.egg-info}/PKG-INFO +1 -1
  5. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/LICENSE +0 -0
  6. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/MANIFEST.in +0 -0
  7. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/README.md +0 -0
  8. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/pyproject.toml +0 -0
  9. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/setup.cfg +0 -0
  10. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/__init__.py +0 -0
  11. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/cli/__init__.py +0 -0
  12. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/cli/util/__init__.py +0 -0
  13. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/cli/util/click.py +0 -0
  14. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/cli/util/processing.py +0 -0
  15. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/cli/util/system.py +0 -0
  16. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/client/__init__.py +0 -0
  17. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/client/client.py +0 -0
  18. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/client/ingest_job_handler.py +0 -0
  19. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/client/interface.py +0 -0
  20. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/client/util/processing.py +0 -0
  21. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/nv_ingest_cli.py +0 -0
  22. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/__init__.py +0 -0
  23. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/jobs/__init__.py +0 -0
  24. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/jobs/job_spec.py +0 -0
  25. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/jobs/job_state.py +0 -0
  26. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/__init__.py +0 -0
  27. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/audio_extraction.py +0 -0
  28. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/caption.py +0 -0
  29. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/chart_extraction.py +0 -0
  30. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/dedup.py +0 -0
  31. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/extract.py +0 -0
  32. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/filter.py +0 -0
  33. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/infographic_extraction.py +0 -0
  34. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/split.py +0 -0
  35. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/store.py +0 -0
  36. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/table_extraction.py +0 -0
  37. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/task_base.py +0 -0
  38. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/task_factory.py +0 -0
  39. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/primitives/tasks/vdb_upload.py +0 -0
  40. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/__init__.py +0 -0
  41. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/dataset.py +0 -0
  42. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/document_analysis.py +0 -0
  43. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/file_processing/__init__.py +0 -0
  44. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/file_processing/extract.py +0 -0
  45. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/image_disk_utils.py +0 -0
  46. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/milvus.py +0 -0
  47. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/process_json_files.py +0 -0
  48. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/processing.py +0 -0
  49. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/system.py +0 -0
  50. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/transport.py +0 -0
  51. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/util.py +0 -0
  52. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/vdb/__init__.py +0 -0
  53. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/vdb/adt_vdb.py +0 -0
  54. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/vdb/milvus.py +0 -0
  55. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/vdb/opensearch.py +0 -0
  56. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client/util/zipkin.py +0 -0
  57. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client.egg-info/SOURCES.txt +0 -0
  58. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client.egg-info/dependency_links.txt +0 -0
  59. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client.egg-info/entry_points.txt +0 -0
  60. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client.egg-info/requires.txt +0 -0
  61. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/nv_ingest_client.egg-info/top_level.txt +0 -0
  62. {nv_ingest_client-2025.10.25.dev20251025 → nv_ingest_client-2025.10.27.dev20251027}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.10.25.dev20251025
3
+ Version: 2025.10.27.dev20251027
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -36,6 +36,8 @@ class EmbedTask(Task):
36
36
  image_elements_modality: Optional[str] = None,
37
37
  structured_elements_modality: Optional[str] = None,
38
38
  audio_elements_modality: Optional[str] = None,
39
+ custom_content_field: Optional[str] = None,
40
+ result_target_field: Optional[str] = None,
39
41
  ) -> None:
40
42
  """
41
43
  Initialize the EmbedTask configuration.
@@ -76,6 +78,8 @@ class EmbedTask(Task):
76
78
  image_elements_modality=image_elements_modality,
77
79
  structured_elements_modality=structured_elements_modality,
78
80
  audio_elements_modality=audio_elements_modality,
81
+ custom_content_field=custom_content_field,
82
+ result_target_field=result_target_field,
79
83
  )
80
84
 
81
85
  self._endpoint_url = validated_data.endpoint_url
@@ -86,6 +90,8 @@ class EmbedTask(Task):
86
90
  self._image_elements_modality = validated_data.image_elements_modality
87
91
  self._structured_elements_modality = validated_data.structured_elements_modality
88
92
  self._audio_elements_modality = validated_data.audio_elements_modality
93
+ self._custom_content_field = validated_data.custom_content_field
94
+ self._result_target_field = validated_data.result_target_field
89
95
 
90
96
  def __str__(self) -> str:
91
97
  """
@@ -114,6 +120,10 @@ class EmbedTask(Task):
114
120
  info += f" structured_elements_modality: {self._structured_elements_modality}\n"
115
121
  if self._audio_elements_modality:
116
122
  info += f" audio_elements_modality: {self._audio_elements_modality}\n"
123
+ if self._custom_content_field:
124
+ info += f" custom_content_field: {self._custom_content_field}\n"
125
+ if self._result_target_field:
126
+ info += f" result_target_field: {self.result_target_field}\n"
117
127
  return info
118
128
 
119
129
  def to_dict(self) -> Dict[str, Any]:
@@ -149,4 +159,10 @@ class EmbedTask(Task):
149
159
  if self._audio_elements_modality:
150
160
  task_properties["audio_elements_modality"] = self._audio_elements_modality
151
161
 
162
+ if self._custom_content_field:
163
+ task_properties["custom_content_field"] = self._custom_content_field
164
+
165
+ if self._result_target_field:
166
+ task_properties["result_target_field"] = self.result_target_field
167
+
152
168
  return {"type": "embed", "task_properties": task_properties}
@@ -11,6 +11,7 @@ import logging
11
11
  import importlib
12
12
  import inspect
13
13
  import ast
14
+ import re
14
15
  from typing import Dict, Optional, Union
15
16
 
16
17
  from nv_ingest_api.internal.enums.common import PipelinePhase
@@ -122,54 +123,50 @@ def _resolve_udf_function(udf_function_spec: str) -> str:
122
123
  3. File path: '/path/to/file.py:my_function'
123
124
  4. Legacy import path: 'my_module.my_function' (function name only, no imports)
124
125
  """
125
- if udf_function_spec.strip().startswith("def "):
126
- # Already an inline function string
127
- return udf_function_spec
126
+ # Default to treating as inline unless it clearly matches a
127
+ # module/file specification. This avoids misclassifying inline code that
128
+ # contains colons, imports, or annotations before the def line.
128
129
 
129
- elif ".py:" in udf_function_spec:
130
- # File path format: /path/to/file.py:function_name
131
- file_path, function_name = udf_function_spec.split(":", 1)
130
+ spec = udf_function_spec.strip()
131
+
132
+ # 1) File path with function: /path/to/file.py:function_name
133
+ if ".py:" in spec:
134
+ file_path, function_name = spec.split(":", 1)
132
135
  return _extract_function_with_context(file_path, function_name)
133
136
 
134
- elif udf_function_spec.endswith(".py"):
135
- # File path format without function name - this is an error
137
+ # 2) File path without function name is an explicit error
138
+ if spec.endswith(".py"):
136
139
  raise ValueError(
137
- f"File path '{udf_function_spec}' is missing function name. "
138
- f"Use format 'file.py:function_name' to specify which function to use."
140
+ f"File path '{udf_function_spec}' is missing function name. Use format 'file.py:function_name'."
139
141
  )
140
142
 
141
- elif ":" in udf_function_spec and ".py:" not in udf_function_spec:
142
- # Module path format with colon: my_module.submodule:function_name
143
- # This preserves imports and module context
144
- module_path, function_name = udf_function_spec.split(":", 1)
145
-
143
+ # 3) Module path with colon: my.module:function
144
+ # Be strict: only letters, numbers, underscore, and dots on the left; valid identifier on the right;
145
+ # no whitespace/newlines.
146
+ module_colon_pattern = re.compile(r"^[A-Za-z_][\w\.]*:[A-Za-z_][\w]*$")
147
+ if module_colon_pattern.match(spec):
148
+ module_path, function_name = spec.split(":", 1)
146
149
  try:
147
- # Import the module to get its file path
148
150
  module = importlib.import_module(module_path)
149
151
  module_file = inspect.getfile(module)
150
-
151
- # Extract the function with full module context
152
152
  return _extract_function_with_context(module_file, function_name)
153
-
154
153
  except ImportError as e:
155
154
  raise ValueError(f"Failed to import module '{module_path}': {e}")
156
155
  except Exception as e:
157
156
  raise ValueError(f"Failed to resolve module path '{module_path}': {e}")
158
157
 
159
- elif "." in udf_function_spec:
160
- # Legacy import path format: module.submodule.function
161
- # This only extracts the function source without imports (legacy behavior)
162
- func = _load_function_from_import_path(udf_function_spec)
163
-
164
- # Get the source code of the function only
158
+ # 4) Legacy import path: my.module.function (no colon)
159
+ legacy_import_pattern = re.compile(r"^[A-Za-z_][\w\.]*\.[A-Za-z_][\w]*$")
160
+ if legacy_import_pattern.match(spec):
161
+ func = _load_function_from_import_path(spec)
165
162
  try:
166
163
  source = inspect.getsource(func)
167
164
  return source
168
165
  except (OSError, TypeError) as e:
169
166
  raise ValueError(f"Could not get source code for function from '{udf_function_spec}': {e}")
170
167
 
171
- else:
172
- raise ValueError(f"Invalid UDF function specification: {udf_function_spec}")
168
+ # 5) Default: treat as inline UDF source (entire string)
169
+ return udf_function_spec
173
170
 
174
171
 
175
172
  class UDFTask(Task):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.10.25.dev20251025
3
+ Version: 2025.10.27.dev20251027
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License