nv-ingest-api 2025.2.17.dev20250217233316__tar.gz → 2025.2.18.dev20250218233328__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- {nv_ingest_api-2025.2.17.dev20250217233316/src/nv_ingest_api.egg-info → nv_ingest_api-2025.2.18.dev20250218233328}/PKG-INFO +1 -1
- nv_ingest_api-2025.2.18.dev20250218233328/src/nv_ingest_api/__init__.py +0 -0
- nv_ingest_api-2025.2.18.dev20250218233328/src/nv_ingest_api/primitives/__init__.py +0 -0
- nv_ingest_api-2025.2.18.dev20250218233328/src/nv_ingest_api/primitives/control_message_task.py +10 -0
- nv_ingest_api-2025.2.18.dev20250218233328/src/nv_ingest_api/primitives/ingest_control_message.py +216 -0
- {nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
- {nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/src/nv_ingest_api.egg-info/SOURCES.txt +5 -1
- nv_ingest_api-2025.2.18.dev20250218233328/src/nv_ingest_api.egg-info/top_level.txt +1 -0
- nv_ingest_api-2025.2.17.dev20250217233316/src/nv_ingest_api.egg-info/top_level.txt +0 -1
- {nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/LICENSE +0 -0
- {nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/MANIFEST.in +0 -0
- {nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/README.md +0 -0
- {nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/pyproject.toml +0 -0
- {nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/setup.cfg +0 -0
- {nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/src/__init__.py +0 -0
- {nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
- {nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/src/nv_ingest_api.egg-info/requires.txt +0 -0
- {nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/src/version.py +0 -0
|
File without changes
|
|
File without changes
|
nv_ingest_api-2025.2.18.dev20250218233328/src/nv_ingest_api/primitives/ingest_control_message.py
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import re
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from typing import Any, Dict, Generator, Union
|
|
8
|
+
|
|
9
|
+
from nv_ingest_api.primitives.control_message_task import ControlMessageTask
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class IngestControlMessage:
|
|
16
|
+
"""
|
|
17
|
+
A control message class for ingesting tasks and managing associated metadata,
|
|
18
|
+
timestamps, configuration, and payload.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
"""
|
|
23
|
+
Initialize a new IngestControlMessage instance.
|
|
24
|
+
"""
|
|
25
|
+
self._tasks: Dict[str, ControlMessageTask] = {}
|
|
26
|
+
self._metadata: Dict[str, Any] = {}
|
|
27
|
+
self._timestamps: Dict[str, datetime] = {}
|
|
28
|
+
self._payload: pd.DataFrame = pd.DataFrame()
|
|
29
|
+
self._config: Dict[str, Any] = {}
|
|
30
|
+
|
|
31
|
+
def add_task(self, task: ControlMessageTask):
|
|
32
|
+
"""
|
|
33
|
+
Add a task to the control message, keyed by the task's unique 'id'.
|
|
34
|
+
|
|
35
|
+
Raises
|
|
36
|
+
------
|
|
37
|
+
ValueError
|
|
38
|
+
If a task with the same 'id' already exists.
|
|
39
|
+
"""
|
|
40
|
+
if task.id in self._tasks:
|
|
41
|
+
raise ValueError(f"Task with id '{task.id}' already exists. Tasks must be unique.")
|
|
42
|
+
self._tasks[task.id] = task
|
|
43
|
+
|
|
44
|
+
def get_tasks(self) -> Generator[ControlMessageTask, None, None]:
|
|
45
|
+
"""
|
|
46
|
+
Return all tasks as a generator.
|
|
47
|
+
"""
|
|
48
|
+
yield from self._tasks.values()
|
|
49
|
+
|
|
50
|
+
def has_task(self, task_id: str) -> bool:
|
|
51
|
+
"""
|
|
52
|
+
Check if a task with the given ID exists.
|
|
53
|
+
"""
|
|
54
|
+
return task_id in self._tasks
|
|
55
|
+
|
|
56
|
+
def remove_task(self, task_id: str) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Remove a task from the control message. Logs a warning if the task does not exist.
|
|
59
|
+
"""
|
|
60
|
+
if task_id in self._tasks:
|
|
61
|
+
del self._tasks[task_id]
|
|
62
|
+
else:
|
|
63
|
+
logger.warning(f"Attempted to remove non-existent task with id: {task_id}")
|
|
64
|
+
|
|
65
|
+
def config(self, config: Dict[str, Any] = None) -> Dict[str, Any]:
|
|
66
|
+
"""
|
|
67
|
+
Get or update the control message configuration.
|
|
68
|
+
|
|
69
|
+
If 'config' is provided, it must be a dictionary. The configuration is updated with the
|
|
70
|
+
provided values. If no argument is provided, returns a copy of the current configuration.
|
|
71
|
+
|
|
72
|
+
Raises
|
|
73
|
+
------
|
|
74
|
+
ValueError
|
|
75
|
+
If the provided configuration is not a dictionary.
|
|
76
|
+
"""
|
|
77
|
+
if config is None:
|
|
78
|
+
return self._config.copy()
|
|
79
|
+
|
|
80
|
+
if not isinstance(config, dict):
|
|
81
|
+
raise ValueError("Configuration must be provided as a dictionary.")
|
|
82
|
+
|
|
83
|
+
self._config.update(config)
|
|
84
|
+
return self._config.copy()
|
|
85
|
+
|
|
86
|
+
def copy(self) -> "IngestControlMessage":
|
|
87
|
+
"""
|
|
88
|
+
Create a deep copy of this control message.
|
|
89
|
+
"""
|
|
90
|
+
return copy.deepcopy(self)
|
|
91
|
+
|
|
92
|
+
def get_metadata(self, key: Union[str, re.Pattern] = None, default_value: Any = None) -> Any:
|
|
93
|
+
"""
|
|
94
|
+
Retrieve metadata. If 'key' is None, returns a copy of all metadata.
|
|
95
|
+
|
|
96
|
+
Parameters
|
|
97
|
+
----------
|
|
98
|
+
key : str or re.Pattern, optional
|
|
99
|
+
If a string is provided, returns the value for that exact key.
|
|
100
|
+
If a regex pattern is provided, returns a dictionary of all metadata key-value pairs
|
|
101
|
+
where the key matches the regex. If no matches are found, returns default_value.
|
|
102
|
+
default_value : Any, optional
|
|
103
|
+
The value to return if the key is not found or no regex matches.
|
|
104
|
+
|
|
105
|
+
Returns
|
|
106
|
+
-------
|
|
107
|
+
Any
|
|
108
|
+
The metadata value for an exact string key, or a dict of matching metadata if a regex is provided.
|
|
109
|
+
"""
|
|
110
|
+
if key is None:
|
|
111
|
+
return self._metadata.copy()
|
|
112
|
+
|
|
113
|
+
# If key is a regex pattern (i.e. has a search method), perform pattern matching.
|
|
114
|
+
if hasattr(key, "search"):
|
|
115
|
+
matches = {k: v for k, v in self._metadata.items() if key.search(k)}
|
|
116
|
+
return matches if matches else default_value
|
|
117
|
+
|
|
118
|
+
# Otherwise, perform an exact lookup.
|
|
119
|
+
return self._metadata.get(key, default_value)
|
|
120
|
+
|
|
121
|
+
def has_metadata(self, key: Union[str, re.Pattern]) -> bool:
|
|
122
|
+
"""
|
|
123
|
+
Check if a metadata key exists.
|
|
124
|
+
|
|
125
|
+
Parameters
|
|
126
|
+
----------
|
|
127
|
+
key : str or re.Pattern
|
|
128
|
+
If a string is provided, checks for the exact key.
|
|
129
|
+
If a regex pattern is provided, returns True if any metadata key matches the regex.
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
bool
|
|
134
|
+
True if the key (or any matching key, in case of a regex) exists, False otherwise.
|
|
135
|
+
"""
|
|
136
|
+
if hasattr(key, "search"):
|
|
137
|
+
return any(key.search(k) for k in self._metadata)
|
|
138
|
+
return key in self._metadata
|
|
139
|
+
|
|
140
|
+
def list_metadata(self) -> list:
|
|
141
|
+
"""
|
|
142
|
+
List all metadata keys.
|
|
143
|
+
"""
|
|
144
|
+
return list(self._metadata.keys())
|
|
145
|
+
|
|
146
|
+
def set_metadata(self, key: str, value: Any) -> None:
|
|
147
|
+
"""
|
|
148
|
+
Set a metadata key-value pair.
|
|
149
|
+
"""
|
|
150
|
+
self._metadata[key] = value
|
|
151
|
+
|
|
152
|
+
def filter_timestamp(self, regex_filter: str) -> Dict[str, datetime]:
|
|
153
|
+
"""
|
|
154
|
+
Retrieve timestamps whose keys match the regex filter.
|
|
155
|
+
"""
|
|
156
|
+
pattern = re.compile(regex_filter)
|
|
157
|
+
return {key: ts for key, ts in self._timestamps.items() if pattern.search(key)}
|
|
158
|
+
|
|
159
|
+
def get_timestamp(self, key: str, fail_if_nonexist: bool = False) -> datetime:
|
|
160
|
+
"""
|
|
161
|
+
Retrieve a timestamp for a given key.
|
|
162
|
+
|
|
163
|
+
Raises
|
|
164
|
+
------
|
|
165
|
+
KeyError
|
|
166
|
+
If the key is not found and 'fail_if_nonexist' is True.
|
|
167
|
+
"""
|
|
168
|
+
if key in self._timestamps:
|
|
169
|
+
return self._timestamps[key]
|
|
170
|
+
if fail_if_nonexist:
|
|
171
|
+
raise KeyError(f"Timestamp for key '{key}' does not exist.")
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
def get_timestamps(self) -> Dict[str, datetime]:
|
|
175
|
+
"""
|
|
176
|
+
Retrieve all timestamps.
|
|
177
|
+
"""
|
|
178
|
+
return self._timestamps.copy()
|
|
179
|
+
|
|
180
|
+
def set_timestamp(self, key: str, timestamp: Any) -> None:
|
|
181
|
+
"""
|
|
182
|
+
Set a timestamp for a given key. Accepts either a datetime object or an ISO format string.
|
|
183
|
+
|
|
184
|
+
Raises
|
|
185
|
+
------
|
|
186
|
+
ValueError
|
|
187
|
+
If the provided timestamp is neither a datetime object nor a valid ISO format string.
|
|
188
|
+
"""
|
|
189
|
+
if isinstance(timestamp, datetime):
|
|
190
|
+
self._timestamps[key] = timestamp
|
|
191
|
+
elif isinstance(timestamp, str):
|
|
192
|
+
try:
|
|
193
|
+
dt = datetime.fromisoformat(timestamp)
|
|
194
|
+
self._timestamps[key] = dt
|
|
195
|
+
except ValueError as e:
|
|
196
|
+
raise ValueError(f"Invalid timestamp format: {timestamp}") from e
|
|
197
|
+
else:
|
|
198
|
+
raise ValueError("timestamp must be a datetime object or ISO format string")
|
|
199
|
+
|
|
200
|
+
def payload(self, payload: pd.DataFrame = None) -> pd.DataFrame:
|
|
201
|
+
"""
|
|
202
|
+
Get or set the payload DataFrame.
|
|
203
|
+
|
|
204
|
+
Raises
|
|
205
|
+
------
|
|
206
|
+
ValueError
|
|
207
|
+
If the provided payload is not a pandas DataFrame.
|
|
208
|
+
"""
|
|
209
|
+
if payload is None:
|
|
210
|
+
return self._payload
|
|
211
|
+
|
|
212
|
+
if not isinstance(payload, pd.DataFrame):
|
|
213
|
+
raise ValueError("Payload must be a pandas DataFrame")
|
|
214
|
+
|
|
215
|
+
self._payload = payload
|
|
216
|
+
return self._payload
|
|
@@ -4,8 +4,12 @@ README.md
|
|
|
4
4
|
pyproject.toml
|
|
5
5
|
src/__init__.py
|
|
6
6
|
src/version.py
|
|
7
|
+
src/nv_ingest_api/__init__.py
|
|
7
8
|
src/nv_ingest_api.egg-info/PKG-INFO
|
|
8
9
|
src/nv_ingest_api.egg-info/SOURCES.txt
|
|
9
10
|
src/nv_ingest_api.egg-info/dependency_links.txt
|
|
10
11
|
src/nv_ingest_api.egg-info/requires.txt
|
|
11
|
-
src/nv_ingest_api.egg-info/top_level.txt
|
|
12
|
+
src/nv_ingest_api.egg-info/top_level.txt
|
|
13
|
+
src/nv_ingest_api/primitives/__init__.py
|
|
14
|
+
src/nv_ingest_api/primitives/control_message_task.py
|
|
15
|
+
src/nv_ingest_api/primitives/ingest_control_message.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
nv_ingest_api
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
{nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/LICENSE
RENAMED
|
File without changes
|
{nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/MANIFEST.in
RENAMED
|
File without changes
|
{nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/README.md
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest_api-2025.2.17.dev20250217233316 → nv_ingest_api-2025.2.18.dev20250218233328}/setup.cfg
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|