nv-ingest-api 2025.8.14.dev20250814__py3-none-any.whl → 2025.8.15.dev20250815__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/internal/enums/common.py +37 -0
- nv_ingest_api/internal/extract/image/image_extractor.py +5 -1
- nv_ingest_api/internal/meta/__init__.py +3 -0
- nv_ingest_api/internal/meta/udf.py +232 -0
- nv_ingest_api/internal/primitives/ingest_control_message.py +63 -22
- nv_ingest_api/internal/primitives/tracing/tagging.py +102 -15
- nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +40 -4
- nv_ingest_api/internal/schemas/meta/udf.py +23 -0
- nv_ingest_api/internal/transform/embed_text.py +5 -0
- nv_ingest_api/util/exception_handlers/decorators.py +104 -156
- nv_ingest_api/util/imports/callable_signatures.py +59 -1
- nv_ingest_api/util/imports/dynamic_resolvers.py +53 -5
- nv_ingest_api/util/introspection/__init__.py +3 -0
- nv_ingest_api/util/introspection/class_inspect.py +145 -0
- nv_ingest_api/util/introspection/function_inspect.py +65 -0
- nv_ingest_api/util/logging/configuration.py +71 -7
- nv_ingest_api/util/string_processing/configuration.py +682 -0
- nv_ingest_api/util/string_processing/yaml.py +45 -0
- nv_ingest_api/util/system/hardware_info.py +178 -13
- {nv_ingest_api-2025.8.14.dev20250814.dist-info → nv_ingest_api-2025.8.15.dev20250815.dist-info}/METADATA +1 -1
- {nv_ingest_api-2025.8.14.dev20250814.dist-info → nv_ingest_api-2025.8.15.dev20250815.dist-info}/RECORD +24 -16
- {nv_ingest_api-2025.8.14.dev20250814.dist-info → nv_ingest_api-2025.8.15.dev20250815.dist-info}/WHEEL +0 -0
- {nv_ingest_api-2025.8.14.dev20250814.dist-info → nv_ingest_api-2025.8.15.dev20250815.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.8.14.dev20250814.dist-info → nv_ingest_api-2025.8.15.dev20250815.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import inspect
|
|
6
|
+
from typing import Optional, Type, Union, Callable
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def find_pydantic_config_schema(
|
|
12
|
+
actor_class: Type,
|
|
13
|
+
base_class_to_find: Type,
|
|
14
|
+
param_name: str = "config",
|
|
15
|
+
) -> Optional[Type[BaseModel]]:
|
|
16
|
+
"""
|
|
17
|
+
Introspects a class's MRO to find a Pydantic model in its __init__ signature.
|
|
18
|
+
|
|
19
|
+
This function is designed to find the specific Pydantic configuration model
|
|
20
|
+
for a pipeline actor, which might be a direct class or a proxy object.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
actor_class : Type
|
|
25
|
+
The actor class or proxy object to inspect.
|
|
26
|
+
base_class_to_find : Type
|
|
27
|
+
The specific base class (e.g., RaySource, RayStage) to look for when
|
|
28
|
+
resolving the true actor class from a proxy.
|
|
29
|
+
param_name : str, optional
|
|
30
|
+
The name of the __init__ parameter to inspect for the Pydantic schema,
|
|
31
|
+
by default "config".
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
Optional[Type[BaseModel]]
|
|
36
|
+
The Pydantic BaseModel class if found, otherwise None.
|
|
37
|
+
"""
|
|
38
|
+
# 1. Find the actual class to inspect, handling proxy objects.
|
|
39
|
+
cls_to_inspect = None
|
|
40
|
+
if inspect.isclass(actor_class):
|
|
41
|
+
cls_to_inspect = actor_class
|
|
42
|
+
else:
|
|
43
|
+
for base in actor_class.__class__.__mro__:
|
|
44
|
+
if inspect.isclass(base) and issubclass(base, base_class_to_find) and base is not base_class_to_find:
|
|
45
|
+
cls_to_inspect = base
|
|
46
|
+
break
|
|
47
|
+
|
|
48
|
+
if not cls_to_inspect:
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
# 2. Walk the MRO of the real class to find the __init__ with the typed parameter.
|
|
52
|
+
for cls in cls_to_inspect.__mro__:
|
|
53
|
+
if param_name in getattr(cls.__init__, "__annotations__", {}):
|
|
54
|
+
try:
|
|
55
|
+
init_sig = inspect.signature(cls.__init__)
|
|
56
|
+
config_param = init_sig.parameters.get(param_name)
|
|
57
|
+
if (
|
|
58
|
+
config_param
|
|
59
|
+
and config_param.annotation is not BaseModel
|
|
60
|
+
and issubclass(config_param.annotation, BaseModel)
|
|
61
|
+
):
|
|
62
|
+
return config_param.annotation # Found the schema
|
|
63
|
+
except (ValueError, TypeError):
|
|
64
|
+
# This class's __init__ is not inspectable (e.g., a C-extension), continue up the MRO.
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def find_pydantic_config_schema_for_callable(
|
|
71
|
+
callable_fn: Callable,
|
|
72
|
+
param_name: str = "stage_config",
|
|
73
|
+
) -> Optional[Type[BaseModel]]:
|
|
74
|
+
"""
|
|
75
|
+
Introspects a callable's signature to find a Pydantic model parameter.
|
|
76
|
+
|
|
77
|
+
This function is designed to find the specific Pydantic configuration model
|
|
78
|
+
for a pipeline callable function.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
callable_fn : Callable
|
|
83
|
+
The callable function to inspect.
|
|
84
|
+
param_name : str, optional
|
|
85
|
+
The name of the parameter to inspect for the Pydantic schema,
|
|
86
|
+
by default "stage_config".
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
Optional[Type[BaseModel]]
|
|
91
|
+
The Pydantic BaseModel class if found, otherwise None.
|
|
92
|
+
"""
|
|
93
|
+
try:
|
|
94
|
+
sig = inspect.signature(callable_fn)
|
|
95
|
+
config_param = sig.parameters.get(param_name)
|
|
96
|
+
if (
|
|
97
|
+
config_param
|
|
98
|
+
and config_param.annotation is not BaseModel
|
|
99
|
+
and hasattr(config_param.annotation, "__mro__")
|
|
100
|
+
and issubclass(config_param.annotation, BaseModel)
|
|
101
|
+
):
|
|
102
|
+
return config_param.annotation
|
|
103
|
+
except (ValueError, TypeError):
|
|
104
|
+
# Function signature is not inspectable
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def find_pydantic_config_schema_unified(
|
|
111
|
+
target: Union[Type, Callable],
|
|
112
|
+
base_class_to_find: Optional[Type] = None,
|
|
113
|
+
param_name: str = "config",
|
|
114
|
+
) -> Optional[Type[BaseModel]]:
|
|
115
|
+
"""
|
|
116
|
+
Unified function to find Pydantic schema for either classes or callables.
|
|
117
|
+
|
|
118
|
+
Parameters
|
|
119
|
+
----------
|
|
120
|
+
target : Union[Type, Callable]
|
|
121
|
+
The class or callable to inspect.
|
|
122
|
+
base_class_to_find : Optional[Type], optional
|
|
123
|
+
The specific base class to look for when resolving actor classes from proxies.
|
|
124
|
+
Only used for class inspection.
|
|
125
|
+
param_name : str, optional
|
|
126
|
+
The name of the parameter to inspect for the Pydantic schema.
|
|
127
|
+
For classes: defaults to "config"
|
|
128
|
+
For callables: should be "stage_config"
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
Optional[Type[BaseModel]]
|
|
133
|
+
The Pydantic BaseModel class if found, otherwise None.
|
|
134
|
+
"""
|
|
135
|
+
if callable(target) and not inspect.isclass(target):
|
|
136
|
+
# Handle callable function
|
|
137
|
+
return find_pydantic_config_schema_for_callable(target, param_name)
|
|
138
|
+
elif inspect.isclass(target) or hasattr(target, "__class__"):
|
|
139
|
+
# Handle class or proxy object
|
|
140
|
+
if base_class_to_find is None:
|
|
141
|
+
# If no base class specified, we can't use the original function
|
|
142
|
+
return None
|
|
143
|
+
return find_pydantic_config_schema(target, base_class_to_find, param_name)
|
|
144
|
+
else:
|
|
145
|
+
return None
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Utilities for introspecting and analyzing UDF function specifications.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def infer_udf_function_name(udf_function: str) -> Optional[str]:
|
|
14
|
+
"""
|
|
15
|
+
Attempts to infer the UDF function name from the provided function string.
|
|
16
|
+
|
|
17
|
+
Supports three formats:
|
|
18
|
+
1. Inline function: 'def my_func(control_message): ...' -> 'my_func'
|
|
19
|
+
2. Import path: 'my_module.my_function' -> 'my_function'
|
|
20
|
+
3. File path: '/path/to/file.py:function_name' -> 'function_name'
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
udf_function : str
|
|
25
|
+
The UDF function string.
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
Optional[str]
|
|
30
|
+
The inferred UDF function name, or None if inference is not possible.
|
|
31
|
+
|
|
32
|
+
Examples
|
|
33
|
+
--------
|
|
34
|
+
>>> infer_udf_function_name("def my_custom_func(control_message): pass")
|
|
35
|
+
'my_custom_func'
|
|
36
|
+
|
|
37
|
+
>>> infer_udf_function_name("my_module.submodule.process_data")
|
|
38
|
+
'process_data'
|
|
39
|
+
|
|
40
|
+
>>> infer_udf_function_name("/path/to/script.py:custom_function")
|
|
41
|
+
'custom_function'
|
|
42
|
+
|
|
43
|
+
>>> infer_udf_function_name("/path/to/script.py")
|
|
44
|
+
None
|
|
45
|
+
"""
|
|
46
|
+
udf_function = udf_function.strip()
|
|
47
|
+
|
|
48
|
+
# Format 3: File path with explicit function name
|
|
49
|
+
if ":" in udf_function and ("/" in udf_function or "\\" in udf_function):
|
|
50
|
+
# File path with explicit function name: '/path/to/file.py:function_name'
|
|
51
|
+
return udf_function.split(":")[-1].strip()
|
|
52
|
+
|
|
53
|
+
# Format 2: Import path like 'module.submodule.function'
|
|
54
|
+
elif "." in udf_function and not udf_function.startswith("def "):
|
|
55
|
+
# Import path: extract the last part as function name
|
|
56
|
+
return udf_function.split(".")[-1].strip()
|
|
57
|
+
|
|
58
|
+
# Format 1: Inline function definition
|
|
59
|
+
elif udf_function.startswith("def "):
|
|
60
|
+
# Parse inline function definition to extract function name
|
|
61
|
+
match = re.match(r"def\s+(\w+)\s*\(", udf_function)
|
|
62
|
+
if match:
|
|
63
|
+
return match.group(1)
|
|
64
|
+
|
|
65
|
+
return None
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
|
-
import
|
|
7
|
+
import logging.config
|
|
8
8
|
from enum import Enum
|
|
9
9
|
|
|
10
10
|
|
|
@@ -30,9 +30,73 @@ def configure_logging(level_name: str) -> None:
|
|
|
30
30
|
if not isinstance(numeric_level, int):
|
|
31
31
|
raise ValueError(f"Invalid log level: {level_name}")
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
33
|
+
# Scorched-earth reset: remove ALL existing handlers from root and named loggers
|
|
34
|
+
# to ensure there is exactly one handler after configuration.
|
|
35
|
+
root_logger = logging.getLogger()
|
|
36
|
+
for h in list(root_logger.handlers):
|
|
37
|
+
root_logger.removeHandler(h)
|
|
38
|
+
try:
|
|
39
|
+
h.close()
|
|
40
|
+
except Exception:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
# Clear handlers from all known loggers and make them propagate to root
|
|
44
|
+
for name, logger_obj in list(logging.Logger.manager.loggerDict.items()):
|
|
45
|
+
if isinstance(logger_obj, logging.Logger):
|
|
46
|
+
for h in list(logger_obj.handlers):
|
|
47
|
+
logger_obj.removeHandler(h)
|
|
48
|
+
try:
|
|
49
|
+
h.close()
|
|
50
|
+
except Exception:
|
|
51
|
+
pass
|
|
52
|
+
# Ensure messages bubble to root; levels will be controlled centrally
|
|
53
|
+
logger_obj.propagate = True
|
|
54
|
+
logger_obj.setLevel(logging.NOTSET)
|
|
55
|
+
|
|
56
|
+
# Use dictConfig to establish a single console handler on the root logger.
|
|
57
|
+
config_dict = {
|
|
58
|
+
"version": 1,
|
|
59
|
+
# We already cleared handlers above; keep loggers enabled so they propagate to root
|
|
60
|
+
"disable_existing_loggers": False,
|
|
61
|
+
"formatters": {
|
|
62
|
+
"standard": {
|
|
63
|
+
"format": "%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
"handlers": {
|
|
67
|
+
"console": {
|
|
68
|
+
"class": "logging.StreamHandler",
|
|
69
|
+
"level": numeric_level,
|
|
70
|
+
"formatter": "standard",
|
|
71
|
+
"stream": "ext://sys.stdout",
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
"root": {
|
|
75
|
+
"level": numeric_level,
|
|
76
|
+
"handlers": ["console"],
|
|
77
|
+
},
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
logging.config.dictConfig(config_dict)
|
|
81
|
+
|
|
82
|
+
# Enforce exactly one handler remains attached to root (keep first StreamHandler)
|
|
83
|
+
root_logger = logging.getLogger()
|
|
84
|
+
if len(root_logger.handlers) > 1:
|
|
85
|
+
keep = None
|
|
86
|
+
for h in list(root_logger.handlers):
|
|
87
|
+
if keep is None and isinstance(h, logging.StreamHandler):
|
|
88
|
+
keep = h
|
|
89
|
+
continue
|
|
90
|
+
root_logger.removeHandler(h)
|
|
91
|
+
try:
|
|
92
|
+
h.close()
|
|
93
|
+
except Exception:
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
# Route warnings module through logging
|
|
97
|
+
try:
|
|
98
|
+
import logging as _logging
|
|
99
|
+
|
|
100
|
+
_logging.captureWarnings(True)
|
|
101
|
+
except Exception:
|
|
102
|
+
pass
|