nv-ingest-api 2025.10.13.dev20251013__py3-none-any.whl → 2025.10.15.dev20251015__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +12 -0
- nv_ingest_api/util/service_clients/rest/rest_client.py +24 -3
- nv_ingest_api/util/string_processing/yaml.py +41 -4
- {nv_ingest_api-2025.10.13.dev20251013.dist-info → nv_ingest_api-2025.10.15.dev20251015.dist-info}/METADATA +1 -1
- {nv_ingest_api-2025.10.13.dev20251013.dist-info → nv_ingest_api-2025.10.15.dev20251015.dist-info}/RECORD +8 -8
- {nv_ingest_api-2025.10.13.dev20251013.dist-info → nv_ingest_api-2025.10.15.dev20251015.dist-info}/WHEEL +0 -0
- {nv_ingest_api-2025.10.13.dev20251013.dist-info → nv_ingest_api-2025.10.15.dev20251015.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.10.13.dev20251013.dist-info → nv_ingest_api-2025.10.15.dev20251015.dist-info}/top_level.txt +0 -0
|
@@ -32,6 +32,17 @@ class TracingOptionsSchema(BaseModelNoExt):
|
|
|
32
32
|
total_pages: Optional[int] = None
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
# PDF Configuration Schema
|
|
36
|
+
class PdfConfigSchema(BaseModelNoExt):
|
|
37
|
+
"""PDF-specific configuration options for job submission.
|
|
38
|
+
|
|
39
|
+
Note: split_page_count accepts any positive integer but will be clamped
|
|
40
|
+
to [1, 128] range by the server at runtime.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
split_page_count: Annotated[int, Field(ge=1)] = 32
|
|
44
|
+
|
|
45
|
+
|
|
35
46
|
# Ingest Task Schemas
|
|
36
47
|
|
|
37
48
|
|
|
@@ -270,6 +281,7 @@ class IngestJobSchema(BaseModelNoExt):
|
|
|
270
281
|
job_id: Union[str, int]
|
|
271
282
|
tasks: List[IngestTaskSchema]
|
|
272
283
|
tracing_options: Optional[TracingOptionsSchema] = None
|
|
284
|
+
pdf_config: Optional[PdfConfigSchema] = None
|
|
273
285
|
|
|
274
286
|
|
|
275
287
|
# ------------------------------------------------------------------------------
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
-
import os
|
|
7
6
|
import re
|
|
8
7
|
import time
|
|
9
8
|
from typing import Any, Union, Tuple, Optional, Dict, Callable
|
|
@@ -104,6 +103,17 @@ class RestClient(MessageBrokerClientBase):
|
|
|
104
103
|
Default timeout in seconds for waiting for data after connection. Default is None.
|
|
105
104
|
http_allocator : Optional[Callable[[], Any]], optional
|
|
106
105
|
A callable that returns an HTTP client instance. If None, `requests.Session()` is used.
|
|
106
|
+
**kwargs : dict
|
|
107
|
+
Additional keyword arguments. Supported keys:
|
|
108
|
+
- api_version : str, optional
|
|
109
|
+
API version to use ('v1' or 'v2'). Defaults to 'v1' if not specified.
|
|
110
|
+
Invalid versions will log a warning and fall back to 'v1'.
|
|
111
|
+
- base_url : str, optional
|
|
112
|
+
Override the generated base URL.
|
|
113
|
+
- headers : dict, optional
|
|
114
|
+
Additional headers to include in requests.
|
|
115
|
+
- auth : optional
|
|
116
|
+
Authentication configuration for requests.
|
|
107
117
|
|
|
108
118
|
Returns
|
|
109
119
|
-------
|
|
@@ -138,8 +148,19 @@ class RestClient(MessageBrokerClientBase):
|
|
|
138
148
|
)
|
|
139
149
|
self._client = requests.Session()
|
|
140
150
|
|
|
141
|
-
#
|
|
142
|
-
|
|
151
|
+
# Validate and normalize API version to prevent misconfiguration
|
|
152
|
+
# Default to v1 for backwards compatibility if not explicitly provided
|
|
153
|
+
VALID_API_VERSIONS = {"v1", "v2"}
|
|
154
|
+
raw_api_version = kwargs.get("api_version", "v1")
|
|
155
|
+
api_version = str(raw_api_version).strip().lower()
|
|
156
|
+
|
|
157
|
+
if api_version not in VALID_API_VERSIONS:
|
|
158
|
+
logger.warning(
|
|
159
|
+
f"Invalid API version '{raw_api_version}' specified. "
|
|
160
|
+
f"Valid versions are: {VALID_API_VERSIONS}. Falling back to 'v1'."
|
|
161
|
+
)
|
|
162
|
+
api_version = "v1"
|
|
163
|
+
|
|
143
164
|
self._api_version = api_version
|
|
144
165
|
self._submit_endpoint: str = f"/{api_version}/submit_job"
|
|
145
166
|
self._fetch_endpoint: str = f"/{api_version}/fetch_job"
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import re
|
|
3
|
+
from typing import Optional
|
|
3
4
|
|
|
4
5
|
# This regex finds all forms of environment variables:
|
|
5
6
|
# $VAR, ${VAR}, $VAR|default, and ${VAR|default}
|
|
@@ -20,12 +21,46 @@ def _replacer(match: re.Match) -> str:
|
|
|
20
21
|
var_name = match.group("braced") or match.group("named")
|
|
21
22
|
default_val = match.group("braced_default") or match.group("named_default")
|
|
22
23
|
|
|
23
|
-
#
|
|
24
|
-
value = os.environ.get(var_name
|
|
24
|
+
# First try the primary env var
|
|
25
|
+
value = os.environ.get(var_name)
|
|
26
|
+
if value is not None:
|
|
27
|
+
return value
|
|
25
28
|
|
|
26
|
-
|
|
29
|
+
# If primary is missing, try the default.
|
|
30
|
+
resolved_default = _resolve_default_with_single_fallback(default_val)
|
|
31
|
+
|
|
32
|
+
if resolved_default is None:
|
|
27
33
|
return ""
|
|
28
|
-
|
|
34
|
+
|
|
35
|
+
return resolved_default
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _is_var_ref(token: str) -> Optional[str]:
|
|
39
|
+
"""If token is a $VAR or ${VAR} reference, return VAR name; else None."""
|
|
40
|
+
if not token:
|
|
41
|
+
return None
|
|
42
|
+
if token.startswith("${") and token.endswith("}"):
|
|
43
|
+
inner = token[2:-1]
|
|
44
|
+
return inner if re.fullmatch(r"\w+", inner) else None
|
|
45
|
+
if token.startswith("$"):
|
|
46
|
+
inner = token[1:]
|
|
47
|
+
return inner if re.fullmatch(r"\w+", inner) else None
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _resolve_default_with_single_fallback(default_val: Optional[str]) -> Optional[str]:
|
|
52
|
+
"""
|
|
53
|
+
Support a single-level fallback where the default itself can be another env var.
|
|
54
|
+
For example, in $A|$B or ${A|$B}, we try B if A missing.
|
|
55
|
+
"""
|
|
56
|
+
if default_val is None:
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
var = _is_var_ref(default_val)
|
|
60
|
+
if var is not None:
|
|
61
|
+
return os.environ.get(var, None)
|
|
62
|
+
|
|
63
|
+
return default_val
|
|
29
64
|
|
|
30
65
|
|
|
31
66
|
def substitute_env_vars_in_yaml_content(raw_content: str) -> str:
|
|
@@ -35,6 +70,8 @@ def substitute_env_vars_in_yaml_content(raw_content: str) -> str:
|
|
|
35
70
|
This function finds all occurrences of environment variable placeholders
|
|
36
71
|
($VAR, ${VAR}, $VAR|default, ${VAR|default}) in the input string
|
|
37
72
|
and replaces them with their corresponding environment variable values.
|
|
73
|
+
Also supports a single fallback to another env var: $VAR|$OTHER, ${VAR|$OTHER}
|
|
74
|
+
Quoted defaults are preserved EXACTLY as written (e.g., 'a,b' keeps quotes).
|
|
38
75
|
|
|
39
76
|
Args:
|
|
40
77
|
raw_content: The raw string content of a YAML file.
|
|
@@ -84,7 +84,7 @@ nv_ingest_api/internal/schemas/message_brokers/request_schema.py,sha256=LZX_wXDx
|
|
|
84
84
|
nv_ingest_api/internal/schemas/message_brokers/response_schema.py,sha256=4b275HlzBSzpmuE2wdoeaGKPCdKki3wuWldtRIfrj8w,727
|
|
85
85
|
nv_ingest_api/internal/schemas/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
86
86
|
nv_ingest_api/internal/schemas/meta/base_model_noext.py,sha256=8hXU1uuiqZ6t8EsoZ8vlC5EFf2zSZrKEX133FcfZMwI,316
|
|
87
|
-
nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=
|
|
87
|
+
nv_ingest_api/internal/schemas/meta/ingest_job_schema.py,sha256=oPBoukRAnLW8BH6iKB0A_WIdewi_Go0NlxrakBwnswo,10782
|
|
88
88
|
nv_ingest_api/internal/schemas/meta/metadata_schema.py,sha256=FDD6yq-QxW8yDwn0Bq6bmWakX41ABMn3cytrvCbT-Po,11961
|
|
89
89
|
nv_ingest_api/internal/schemas/meta/udf.py,sha256=GgzqbZOlipQgMpDhbXLqbF8xrHenj_hMNqhR_P-1ynw,779
|
|
90
90
|
nv_ingest_api/internal/schemas/mutate/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
@@ -158,16 +158,16 @@ nv_ingest_api/util/service_clients/kafka/__init__.py,sha256=uLsBITo_XfgbwpzqXUm1
|
|
|
158
158
|
nv_ingest_api/util/service_clients/redis/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
159
159
|
nv_ingest_api/util/service_clients/redis/redis_client.py,sha256=3NLecvIvVN1v-sA7d7G-_f6qJVZyfJE2H8Iu5KG3Aew,37417
|
|
160
160
|
nv_ingest_api/util/service_clients/rest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
161
|
-
nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=
|
|
161
|
+
nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=7ymPxhuN9SP8nPSVepqqbvUxXPaTVunq2aC2bDbg98g,23684
|
|
162
162
|
nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
|
|
163
163
|
nv_ingest_api/util/string_processing/configuration.py,sha256=2HS08msccuPCT0fn_jfXRo9_M6hCZ59OxKLxG_47HRY,29888
|
|
164
|
-
nv_ingest_api/util/string_processing/yaml.py,sha256=
|
|
164
|
+
nv_ingest_api/util/string_processing/yaml.py,sha256=4Zdmc4474lUZn6kznqaNTlQJwsmRnnJQZ-DvAWLu-zo,2678
|
|
165
165
|
nv_ingest_api/util/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
166
|
nv_ingest_api/util/system/hardware_info.py,sha256=1UFM8XE6M3pgQcpbVsCsqDQ7Dj-zzptL-XRE-DEu9UA,27213
|
|
167
|
-
nv_ingest_api-2025.10.
|
|
167
|
+
nv_ingest_api-2025.10.15.dev20251015.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
168
168
|
udfs/__init__.py,sha256=pXFqPgXIUqHDfj7SAR1Q19tt8KwGv_iMvhHyziz4AYM,205
|
|
169
169
|
udfs/llm_summarizer_udf.py,sha256=t_ZFoz0e03uECYcRw4IabRj0GBlwAoJkJn13NL2wbsI,7217
|
|
170
|
-
nv_ingest_api-2025.10.
|
|
171
|
-
nv_ingest_api-2025.10.
|
|
172
|
-
nv_ingest_api-2025.10.
|
|
173
|
-
nv_ingest_api-2025.10.
|
|
170
|
+
nv_ingest_api-2025.10.15.dev20251015.dist-info/METADATA,sha256=C3OVAyEQD5iSPzluBsDanslOq8aNRojyt3QDA8yXwGQ,14086
|
|
171
|
+
nv_ingest_api-2025.10.15.dev20251015.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
172
|
+
nv_ingest_api-2025.10.15.dev20251015.dist-info/top_level.txt,sha256=I1lseG9FF0CH93SPx4kFblsxFuv190cfzaas_CLNIiw,19
|
|
173
|
+
nv_ingest_api-2025.10.15.dev20251015.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|