nv-ingest-api 2025.8.14.dev20250814__py3-none-any.whl → 2025.8.16.dev20250816__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (24) hide show
  1. nv_ingest_api/internal/enums/common.py +37 -0
  2. nv_ingest_api/internal/extract/image/image_extractor.py +5 -1
  3. nv_ingest_api/internal/meta/__init__.py +3 -0
  4. nv_ingest_api/internal/meta/udf.py +232 -0
  5. nv_ingest_api/internal/primitives/ingest_control_message.py +63 -22
  6. nv_ingest_api/internal/primitives/tracing/tagging.py +102 -15
  7. nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +40 -4
  8. nv_ingest_api/internal/schemas/meta/udf.py +23 -0
  9. nv_ingest_api/internal/transform/embed_text.py +5 -0
  10. nv_ingest_api/util/exception_handlers/decorators.py +104 -156
  11. nv_ingest_api/util/imports/callable_signatures.py +59 -1
  12. nv_ingest_api/util/imports/dynamic_resolvers.py +53 -5
  13. nv_ingest_api/util/introspection/__init__.py +3 -0
  14. nv_ingest_api/util/introspection/class_inspect.py +145 -0
  15. nv_ingest_api/util/introspection/function_inspect.py +65 -0
  16. nv_ingest_api/util/logging/configuration.py +71 -7
  17. nv_ingest_api/util/string_processing/configuration.py +682 -0
  18. nv_ingest_api/util/string_processing/yaml.py +45 -0
  19. nv_ingest_api/util/system/hardware_info.py +178 -13
  20. {nv_ingest_api-2025.8.14.dev20250814.dist-info → nv_ingest_api-2025.8.16.dev20250816.dist-info}/METADATA +1 -1
  21. {nv_ingest_api-2025.8.14.dev20250814.dist-info → nv_ingest_api-2025.8.16.dev20250816.dist-info}/RECORD +24 -16
  22. {nv_ingest_api-2025.8.14.dev20250814.dist-info → nv_ingest_api-2025.8.16.dev20250816.dist-info}/WHEEL +0 -0
  23. {nv_ingest_api-2025.8.14.dev20250814.dist-info → nv_ingest_api-2025.8.16.dev20250816.dist-info}/licenses/LICENSE +0 -0
  24. {nv_ingest_api-2025.8.14.dev20250814.dist-info → nv_ingest_api-2025.8.16.dev20250816.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,145 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ import inspect
6
+ from typing import Optional, Type, Union, Callable
7
+
8
+ from pydantic import BaseModel
9
+
10
+
11
+ def find_pydantic_config_schema(
12
+ actor_class: Type,
13
+ base_class_to_find: Type,
14
+ param_name: str = "config",
15
+ ) -> Optional[Type[BaseModel]]:
16
+ """
17
+ Introspects a class's MRO to find a Pydantic model in its __init__ signature.
18
+
19
+ This function is designed to find the specific Pydantic configuration model
20
+ for a pipeline actor, which might be a direct class or a proxy object.
21
+
22
+ Parameters
23
+ ----------
24
+ actor_class : Type
25
+ The actor class or proxy object to inspect.
26
+ base_class_to_find : Type
27
+ The specific base class (e.g., RaySource, RayStage) to look for when
28
+ resolving the true actor class from a proxy.
29
+ param_name : str, optional
30
+ The name of the __init__ parameter to inspect for the Pydantic schema,
31
+ by default "config".
32
+
33
+ Returns
34
+ -------
35
+ Optional[Type[BaseModel]]
36
+ The Pydantic BaseModel class if found, otherwise None.
37
+ """
38
+ # 1. Find the actual class to inspect, handling proxy objects.
39
+ cls_to_inspect = None
40
+ if inspect.isclass(actor_class):
41
+ cls_to_inspect = actor_class
42
+ else:
43
+ for base in actor_class.__class__.__mro__:
44
+ if inspect.isclass(base) and issubclass(base, base_class_to_find) and base is not base_class_to_find:
45
+ cls_to_inspect = base
46
+ break
47
+
48
+ if not cls_to_inspect:
49
+ return None
50
+
51
+ # 2. Walk the MRO of the real class to find the __init__ with the typed parameter.
52
+ for cls in cls_to_inspect.__mro__:
53
+ if param_name in getattr(cls.__init__, "__annotations__", {}):
54
+ try:
55
+ init_sig = inspect.signature(cls.__init__)
56
+ config_param = init_sig.parameters.get(param_name)
57
+ if (
58
+ config_param
59
+ and config_param.annotation is not BaseModel
60
+ and issubclass(config_param.annotation, BaseModel)
61
+ ):
62
+ return config_param.annotation # Found the schema
63
+ except (ValueError, TypeError):
64
+ # This class's __init__ is not inspectable (e.g., a C-extension), continue up the MRO.
65
+ continue
66
+
67
+ return None
68
+
69
+
70
+ def find_pydantic_config_schema_for_callable(
71
+ callable_fn: Callable,
72
+ param_name: str = "stage_config",
73
+ ) -> Optional[Type[BaseModel]]:
74
+ """
75
+ Introspects a callable's signature to find a Pydantic model parameter.
76
+
77
+ This function is designed to find the specific Pydantic configuration model
78
+ for a pipeline callable function.
79
+
80
+ Parameters
81
+ ----------
82
+ callable_fn : Callable
83
+ The callable function to inspect.
84
+ param_name : str, optional
85
+ The name of the parameter to inspect for the Pydantic schema,
86
+ by default "stage_config".
87
+
88
+ Returns
89
+ -------
90
+ Optional[Type[BaseModel]]
91
+ The Pydantic BaseModel class if found, otherwise None.
92
+ """
93
+ try:
94
+ sig = inspect.signature(callable_fn)
95
+ config_param = sig.parameters.get(param_name)
96
+ if (
97
+ config_param
98
+ and config_param.annotation is not BaseModel
99
+ and hasattr(config_param.annotation, "__mro__")
100
+ and issubclass(config_param.annotation, BaseModel)
101
+ ):
102
+ return config_param.annotation
103
+ except (ValueError, TypeError):
104
+ # Function signature is not inspectable
105
+ pass
106
+
107
+ return None
108
+
109
+
110
+ def find_pydantic_config_schema_unified(
111
+ target: Union[Type, Callable],
112
+ base_class_to_find: Optional[Type] = None,
113
+ param_name: str = "config",
114
+ ) -> Optional[Type[BaseModel]]:
115
+ """
116
+ Unified function to find Pydantic schema for either classes or callables.
117
+
118
+ Parameters
119
+ ----------
120
+ target : Union[Type, Callable]
121
+ The class or callable to inspect.
122
+ base_class_to_find : Optional[Type], optional
123
+ The specific base class to look for when resolving actor classes from proxies.
124
+ Only used for class inspection.
125
+ param_name : str, optional
126
+ The name of the parameter to inspect for the Pydantic schema.
127
+ For classes: defaults to "config"
128
+ For callables: should be "stage_config"
129
+
130
+ Returns
131
+ -------
132
+ Optional[Type[BaseModel]]
133
+ The Pydantic BaseModel class if found, otherwise None.
134
+ """
135
+ if callable(target) and not inspect.isclass(target):
136
+ # Handle callable function
137
+ return find_pydantic_config_schema_for_callable(target, param_name)
138
+ elif inspect.isclass(target) or hasattr(target, "__class__"):
139
+ # Handle class or proxy object
140
+ if base_class_to_find is None:
141
+ # If no base class specified, we can't use the original function
142
+ return None
143
+ return find_pydantic_config_schema(target, base_class_to_find, param_name)
144
+ else:
145
+ return None
@@ -0,0 +1,65 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ """
6
+ Utilities for introspecting and analyzing UDF function specifications.
7
+ """
8
+
9
+ import re
10
+ from typing import Optional
11
+
12
+
13
+ def infer_udf_function_name(udf_function: str) -> Optional[str]:
14
+ """
15
+ Attempts to infer the UDF function name from the provided function string.
16
+
17
+ Supports three formats:
18
+ 1. Inline function: 'def my_func(control_message): ...' -> 'my_func'
19
+ 2. Import path: 'my_module.my_function' -> 'my_function'
20
+ 3. File path: '/path/to/file.py:function_name' -> 'function_name'
21
+
22
+ Parameters
23
+ ----------
24
+ udf_function : str
25
+ The UDF function string.
26
+
27
+ Returns
28
+ -------
29
+ Optional[str]
30
+ The inferred UDF function name, or None if inference is not possible.
31
+
32
+ Examples
33
+ --------
34
+ >>> infer_udf_function_name("def my_custom_func(control_message): pass")
35
+ 'my_custom_func'
36
+
37
+ >>> infer_udf_function_name("my_module.submodule.process_data")
38
+ 'process_data'
39
+
40
+ >>> infer_udf_function_name("/path/to/script.py:custom_function")
41
+ 'custom_function'
42
+
43
+ >>> infer_udf_function_name("/path/to/script.py")
44
+ None
45
+ """
46
+ udf_function = udf_function.strip()
47
+
48
+ # Format 3: File path with explicit function name
49
+ if ":" in udf_function and ("/" in udf_function or "\\" in udf_function):
50
+ # File path with explicit function name: '/path/to/file.py:function_name'
51
+ return udf_function.split(":")[-1].strip()
52
+
53
+ # Format 2: Import path like 'module.submodule.function'
54
+ elif "." in udf_function and not udf_function.startswith("def "):
55
+ # Import path: extract the last part as function name
56
+ return udf_function.split(".")[-1].strip()
57
+
58
+ # Format 1: Inline function definition
59
+ elif udf_function.startswith("def "):
60
+ # Parse inline function definition to extract function name
61
+ match = re.match(r"def\s+(\w+)\s*\(", udf_function)
62
+ if match:
63
+ return match.group(1)
64
+
65
+ return None
@@ -4,7 +4,7 @@
4
4
 
5
5
 
6
6
  import logging
7
- import sys
7
+ import logging.config
8
8
  from enum import Enum
9
9
 
10
10
 
@@ -30,9 +30,73 @@ def configure_logging(level_name: str) -> None:
30
30
  if not isinstance(numeric_level, int):
31
31
  raise ValueError(f"Invalid log level: {level_name}")
32
32
 
33
- logging.basicConfig(
34
- level=numeric_level,
35
- format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
36
- stream=sys.stdout,
37
- force=True, # <- reconfigures even if basicConfig was called earlier (Python 3.8+)
38
- )
33
+ # Scorched-earth reset: remove ALL existing handlers from root and named loggers
34
+ # to ensure there is exactly one handler after configuration.
35
+ root_logger = logging.getLogger()
36
+ for h in list(root_logger.handlers):
37
+ root_logger.removeHandler(h)
38
+ try:
39
+ h.close()
40
+ except Exception:
41
+ pass
42
+
43
+ # Clear handlers from all known loggers and make them propagate to root
44
+ for name, logger_obj in list(logging.Logger.manager.loggerDict.items()):
45
+ if isinstance(logger_obj, logging.Logger):
46
+ for h in list(logger_obj.handlers):
47
+ logger_obj.removeHandler(h)
48
+ try:
49
+ h.close()
50
+ except Exception:
51
+ pass
52
+ # Ensure messages bubble to root; levels will be controlled centrally
53
+ logger_obj.propagate = True
54
+ logger_obj.setLevel(logging.NOTSET)
55
+
56
+ # Use dictConfig to establish a single console handler on the root logger.
57
+ config_dict = {
58
+ "version": 1,
59
+ # We already cleared handlers above; keep loggers enabled so they propagate to root
60
+ "disable_existing_loggers": False,
61
+ "formatters": {
62
+ "standard": {
63
+ "format": "%(asctime)s - %(levelname)s - %(name)s - %(message)s",
64
+ }
65
+ },
66
+ "handlers": {
67
+ "console": {
68
+ "class": "logging.StreamHandler",
69
+ "level": numeric_level,
70
+ "formatter": "standard",
71
+ "stream": "ext://sys.stdout",
72
+ }
73
+ },
74
+ "root": {
75
+ "level": numeric_level,
76
+ "handlers": ["console"],
77
+ },
78
+ }
79
+
80
+ logging.config.dictConfig(config_dict)
81
+
82
+ # Enforce exactly one handler remains attached to root (keep first StreamHandler)
83
+ root_logger = logging.getLogger()
84
+ if len(root_logger.handlers) > 1:
85
+ keep = None
86
+ for h in list(root_logger.handlers):
87
+ if keep is None and isinstance(h, logging.StreamHandler):
88
+ keep = h
89
+ continue
90
+ root_logger.removeHandler(h)
91
+ try:
92
+ h.close()
93
+ except Exception:
94
+ pass
95
+
96
+ # Route warnings module through logging
97
+ try:
98
+ import logging as _logging
99
+
100
+ _logging.captureWarnings(True)
101
+ except Exception:
102
+ pass