nominal-streaming 0.7.9__cp310-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nominal-streaming might be problematic. Click here for more details.
- nominal_streaming/__init__.py +4 -0
- nominal_streaming/_nominal_streaming.abi3.so +0 -0
- nominal_streaming/_nominal_streaming.pyi +379 -0
- nominal_streaming/nominal_dataset_stream.py +301 -0
- nominal_streaming/py.typed +0 -0
- nominal_streaming-0.7.9.dist-info/METADATA +73 -0
- nominal_streaming-0.7.9.dist-info/RECORD +8 -0
- nominal_streaming-0.7.9.dist-info/WHEEL +4 -0
|
Binary file
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pathlib
|
|
4
|
+
from types import TracebackType
|
|
5
|
+
from typing import Sequence, Type
|
|
6
|
+
|
|
7
|
+
from typing_extensions import Self
|
|
8
|
+
|
|
9
|
+
from nominal_streaming.nominal_dataset_stream import DataType
|
|
10
|
+
|
|
11
|
+
class PyNominalStreamOpts:
|
|
12
|
+
"""Configuration options for Nominal data streaming.
|
|
13
|
+
|
|
14
|
+
This class configures how data points are batched, buffered, and dispatched
|
|
15
|
+
to the Nominal backend. It mirrors the Rust `NominalStreamOpts` structure,
|
|
16
|
+
providing Pythonic accessors and fluent builder-style methods.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
*,
|
|
22
|
+
max_points_per_batch: int = 250_000,
|
|
23
|
+
max_request_delay_secs: float = 0.1,
|
|
24
|
+
max_buffered_requests: int = 4,
|
|
25
|
+
num_upload_workers: int = 8,
|
|
26
|
+
num_runtime_workers: int = 8,
|
|
27
|
+
base_api_url: str = "https://api.gov.nominal.io/api",
|
|
28
|
+
) -> None:
|
|
29
|
+
"""Initialize a PyNominalStreamOpts instance.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
max_points_per_batch: Maximum number of points per record before dispatching a request.
|
|
33
|
+
max_request_delay_secs: Maximum delay before a request is sent, even if it results in a partial request.
|
|
34
|
+
max_buffered_requests: Maximum number of buffered requests before applying backpressure.
|
|
35
|
+
num_upload_workers: Number of concurrent network dispatches to perform.
|
|
36
|
+
NOTE: should be less than the number of `num_runtime_workers`
|
|
37
|
+
num_runtime_workers: Number of runtime worker threads for concurrent processing.
|
|
38
|
+
base_api_url: Base URL of the Nominal API endpoint to stream data to.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def max_points_per_batch(self) -> int:
|
|
43
|
+
"""Maximum number of data points per record before dispatch.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
The configured upper bound on points per record.
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
>>> PyNominalStreamOpts.default().max_points_per_batch
|
|
50
|
+
50000
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def max_request_delay_secs(self) -> float:
|
|
55
|
+
"""Maximum delay before forcing a request flush.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
The maximum time to wait before sending pending data, in seconds.
|
|
59
|
+
|
|
60
|
+
Example:
|
|
61
|
+
>>> PyNominalStreamOpts.default().max_request_delay > 0
|
|
62
|
+
True
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def max_buffered_requests(self) -> int:
|
|
67
|
+
"""Maximum number of requests that may be buffered concurrently.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
The maximum number of buffered requests before backpressure is applied.
|
|
71
|
+
|
|
72
|
+
Example:
|
|
73
|
+
>>> PyNominalStreamOpts.default().max_buffered_requests >= 0
|
|
74
|
+
True
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def num_upload_workers(self) -> int:
|
|
79
|
+
"""Number of concurrent dispatcher tasks used for network transmission.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
The number of dispatcher tasks.
|
|
83
|
+
|
|
84
|
+
Example:
|
|
85
|
+
>>> PyNominalStreamOpts.default().num_upload_workers >= 1
|
|
86
|
+
True
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def num_runtime_workers(self) -> int:
|
|
91
|
+
"""Number of runtime worker threads for internal processing.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
The configured number of runtime workers.
|
|
95
|
+
|
|
96
|
+
Example:
|
|
97
|
+
>>> PyNominalStreamOpts.default().num_runtime_workers
|
|
98
|
+
8
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def base_api_url(self) -> str:
|
|
103
|
+
"""Base URL for the Nominal API endpoint.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
The fully-qualified base API URL used for streaming requests.
|
|
107
|
+
|
|
108
|
+
Example:
|
|
109
|
+
>>> isinstance(PyNominalStreamOpts.default().base_api_url, str)
|
|
110
|
+
True
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def with_max_points_per_batch(self, n: int) -> Self:
|
|
114
|
+
"""Set the maximum number of points per record.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
n: Maximum number of data points to include in a single record.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
The updated instance for fluent chaining.
|
|
121
|
+
|
|
122
|
+
Example:
|
|
123
|
+
>>> opts = PyNominalStreamOpts.default().with_max_points_per_batch(1000)
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
def with_max_request_delay_secs(self, delay_secs: float) -> Self:
|
|
127
|
+
"""Set the maximum delay before forcing a request flush.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
delay_secs: Maximum time in seconds to wait before sending pending data.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
The updated instance for fluent chaining.
|
|
134
|
+
|
|
135
|
+
Example:
|
|
136
|
+
>>> opts = PyNominalStreamOpts.default().with_max_request_delay_secs(1.0)
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
def with_max_buffered_requests(self, n: int) -> Self:
|
|
140
|
+
"""Set the maximum number of requests that can be buffered concurrently.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
n: Maximum number of buffered requests.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
The updated instance for fluent chaining.
|
|
147
|
+
|
|
148
|
+
Example:
|
|
149
|
+
>>> opts = PyNominalStreamOpts.default().with_max_buffered_requests(200)
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def with_num_upload_workers(self, n: int) -> Self:
|
|
153
|
+
"""Set the number of asynchronous dispatcher tasks.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
n: Number of dispatcher tasks responsible for request transmission.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
The updated instance for fluent chaining.
|
|
160
|
+
|
|
161
|
+
Example:
|
|
162
|
+
>>> opts = PyNominalStreamOpts.default().with_num_upload_workers(8)
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
def with_num_runtime_workers(self, n: int) -> Self:
|
|
166
|
+
"""Set the number of runtime worker threads.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
n: Number of background worker threads used for internal processing.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
The updated instance for fluent chaining.
|
|
173
|
+
|
|
174
|
+
Example:
|
|
175
|
+
>>> opts = PyNominalStreamOpts.default().with_num_runtime_workers(16)
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
def with_api_base_url(self, url: str) -> Self:
|
|
179
|
+
"""Set the base URL for the Nominal API.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
url: Fully-qualified base API URL for streaming requests.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
The updated instance for fluent chaining.
|
|
186
|
+
|
|
187
|
+
Example:
|
|
188
|
+
>>> opts = PyNominalStreamOpts.default().with_api_base_url("https://staging.nominal.io")
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
def __repr__(self) -> str:
|
|
192
|
+
"""Return a developer-friendly string representation of this configuration."""
|
|
193
|
+
|
|
194
|
+
def __str__(self) -> str:
|
|
195
|
+
"""Return a human-readable summary of this configuration."""
|
|
196
|
+
|
|
197
|
+
class PyNominalDatasetStream:
|
|
198
|
+
"""High-throughput client for enqueueing dataset points to Nominal.
|
|
199
|
+
|
|
200
|
+
This is the Python-facing streaming client. It supports a fluent builder
|
|
201
|
+
API for configuration, lifecycle controls (`open`, `close`, `cancel`), and
|
|
202
|
+
multiple enqueue modes (single point, long series, and wide records).
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
def __init__(self, /, opts: PyNominalStreamOpts | None = None) -> None:
|
|
206
|
+
"""Create a new stream builder.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
opts: Optional stream options. If omitted, sensible defaults are used.
|
|
210
|
+
|
|
211
|
+
Example:
|
|
212
|
+
>>> from nominal_streaming import PyNominalStreamOpts
|
|
213
|
+
>>> stream = PyNominalDatasetStream(PyNominalStreamOpts())
|
|
214
|
+
"""
|
|
215
|
+
|
|
216
|
+
def enable_logging(self, log_directive: str | None = None) -> Self:
|
|
217
|
+
"""Enable client-side logging for diagnostics.
|
|
218
|
+
|
|
219
|
+
NOTE: must be applied before calling open()
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
log_directive: If provided, log directive (e.g. "trace" or "info") to configure logging with.
|
|
223
|
+
If not provided, searches for a `RUST_LOG` environment variable, or if not found,
|
|
224
|
+
defaults to debug level logging.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
The updated instance for fluent chaining.
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
def with_options(self, opts: PyNominalStreamOpts) -> Self:
|
|
231
|
+
"""Attach or replace stream options.
|
|
232
|
+
|
|
233
|
+
NOTE: must be applied before calling open()
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
opts: Options for the underlying stream.
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
The updated instance for fluent chaining.
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
def with_core_consumer(
|
|
243
|
+
self,
|
|
244
|
+
dataset_rid: str,
|
|
245
|
+
token: str | None = None,
|
|
246
|
+
) -> Self:
|
|
247
|
+
"""Send data to a Dataset in Nominal.
|
|
248
|
+
|
|
249
|
+
NOTE: Must be applied before calling open()
|
|
250
|
+
|
|
251
|
+
NOTE: Mutually exclusive with `to_file`.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
dataset_rid: Resource identifier of the dataset.
|
|
255
|
+
token: Optional bearer token. If omitted, uses `NOMINAL_TOKEN` environment variable.
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
The updated instance for fluent chaining.
|
|
259
|
+
|
|
260
|
+
Raises:
|
|
261
|
+
RuntimeError: If called after `to_file`.
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
def to_file(self, path: pathlib.Path) -> Self:
|
|
265
|
+
"""Write points to a local file (newline-delimited records).
|
|
266
|
+
|
|
267
|
+
Mutually exclusive with `with_core_consumer`.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
path: Destination file path.
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
The updated instance for fluent chaining.
|
|
274
|
+
|
|
275
|
+
Raises:
|
|
276
|
+
RuntimeError: If already configured for core consumption.
|
|
277
|
+
"""
|
|
278
|
+
|
|
279
|
+
def with_file_fallback(self, path: pathlib.Path) -> Self:
|
|
280
|
+
"""If sending to core fails, fall back to writing to `path`.
|
|
281
|
+
|
|
282
|
+
NOTE: Requires that `with_core_consumer` has been configured.
|
|
283
|
+
|
|
284
|
+
NOTE: Not allowed with `to_file`.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
path: Fallback file path.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
The updated instance for fluent chaining.
|
|
291
|
+
|
|
292
|
+
Raises:
|
|
293
|
+
RuntimeError: If core consumer is not configured.
|
|
294
|
+
"""
|
|
295
|
+
|
|
296
|
+
def open(self) -> None:
|
|
297
|
+
"""Start the runtime and accept enqueues.
|
|
298
|
+
|
|
299
|
+
NOTE: Safe to call multiple times; subsequent calls are no-ops.
|
|
300
|
+
|
|
301
|
+
NOTE: May raise if the builder is not fully configured.
|
|
302
|
+
"""
|
|
303
|
+
|
|
304
|
+
def close(self) -> None:
|
|
305
|
+
"""Gracefully drain pending data and stop the worker runtime.
|
|
306
|
+
|
|
307
|
+
NOTE: Blocks while joining internal threads. Safe to call multiple times.
|
|
308
|
+
"""
|
|
309
|
+
|
|
310
|
+
def cancel(self) -> None:
|
|
311
|
+
"""Fast cancellation of work without guaranteeing a full drain.
|
|
312
|
+
|
|
313
|
+
NOTE: Intended for signal handlers or rapid shutdown paths.
|
|
314
|
+
"""
|
|
315
|
+
|
|
316
|
+
def enqueue(
|
|
317
|
+
self,
|
|
318
|
+
channel_name: str,
|
|
319
|
+
timestamp: int,
|
|
320
|
+
value: DataType,
|
|
321
|
+
tags: dict[str, str] | None = None,
|
|
322
|
+
) -> None:
|
|
323
|
+
"""Enqueue a single point.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
channel_name: Channel name to stream to
|
|
327
|
+
timestamp: Timestamp for the enqueued value.
|
|
328
|
+
Accepts either integral nanoseconds since unix epoch or a datetime, which is presumed to be in UTC.
|
|
329
|
+
value: Data value to stream
|
|
330
|
+
tags: Optional tags to attach to the data.
|
|
331
|
+
|
|
332
|
+
Raises:
|
|
333
|
+
RuntimeError: If the stream is not open or has been cancelled.
|
|
334
|
+
TypeError: If `value` is not an `int`, `float`, or `str`.
|
|
335
|
+
"""
|
|
336
|
+
|
|
337
|
+
def enqueue_batch(
|
|
338
|
+
self,
|
|
339
|
+
channel_name: str,
|
|
340
|
+
timestamps: Sequence[int],
|
|
341
|
+
values: Sequence[DataType],
|
|
342
|
+
tags: dict[str, str] | None = None,
|
|
343
|
+
) -> None:
|
|
344
|
+
"""Enqueue a series for a single channel.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
channel_name: Channel name.
|
|
348
|
+
timestamps: Sequence of timestamps (same accepted forms as in `enqueue`).
|
|
349
|
+
values: Sequence of values (must be homogeneous: all must be float, int, or strings).
|
|
350
|
+
tags: Optional tags to attach to the values.
|
|
351
|
+
|
|
352
|
+
Raises:
|
|
353
|
+
RuntimeError: If the stream is not open or has been cancelled.
|
|
354
|
+
TypeError: If value types are heterogeneous or unsupported.
|
|
355
|
+
ValueError: If lengths of `timestamps` and `values` differ.
|
|
356
|
+
"""
|
|
357
|
+
|
|
358
|
+
def enqueue_from_dict(
|
|
359
|
+
self,
|
|
360
|
+
timestamp: int,
|
|
361
|
+
channel_values: dict[str, DataType],
|
|
362
|
+
tags: dict[str, str] | None = None,
|
|
363
|
+
) -> None:
|
|
364
|
+
"""Enqueue a wide record: many channels at a single timestamp.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
timestamp: Record timestamp (see `enqueue`).
|
|
368
|
+
channel_values: Mapping from channel name to value.
|
|
369
|
+
tags: Optional tags attach to all values in the record.
|
|
370
|
+
|
|
371
|
+
Raises:
|
|
372
|
+
RuntimeError: If the stream is not open or has been cancelled.
|
|
373
|
+
TypeError: If any value is not an `int`, `float`, or `str`.
|
|
374
|
+
"""
|
|
375
|
+
|
|
376
|
+
def __enter__(self) -> Self: ...
|
|
377
|
+
def __exit__(
|
|
378
|
+
self, exc_type: Type[BaseException] | None, exc_value: BaseException | None, traceback: TracebackType | None
|
|
379
|
+
) -> None: ...
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
"""Python-facing API for the streaming client.
|
|
2
|
+
|
|
3
|
+
Example:
|
|
4
|
+
-------
|
|
5
|
+
import pathlib
|
|
6
|
+
from datetime import datetime, timedelta, timezone
|
|
7
|
+
|
|
8
|
+
from nominal_streaming import PyNominalStreamOpts, NominalDatasetStream
|
|
9
|
+
|
|
10
|
+
# NOTE: may also use PyNominalStreamOpts() for sensible defaults that may be customized
|
|
11
|
+
opts = PyNominalStreamOpts(
|
|
12
|
+
max_points_per_batch=250_000,
|
|
13
|
+
max_request_delay=timedelta(seconds=0.1),
|
|
14
|
+
max_buffered_requests=4,
|
|
15
|
+
num_upload_workers=8,
|
|
16
|
+
base_api_url="https://api.gov.nominal.io/api",
|
|
17
|
+
runtime_workers=8,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
with (
|
|
21
|
+
NominalDatasetStream("api_key", opts)
|
|
22
|
+
.with_core_consumer("ri.catalog.dataset...")
|
|
23
|
+
.with_file_fallback(pathlib.Path("/tmp/fallback.avro")) as stream
|
|
24
|
+
):
|
|
25
|
+
stream.enqueue("chanA", datetime.now(timezone.utc), 1.23, tags={"site": "a1"})
|
|
26
|
+
stream.enqueue_batch("chanB", [0, 1_000_000_000], [5, 6], tags={"phase": "prod"})
|
|
27
|
+
stream.enqueue_from_dict(0, {"chanC": "ok", "chanD": 7}, tags={"who": "tester"})
|
|
28
|
+
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
import datetime
|
|
34
|
+
import logging
|
|
35
|
+
import pathlib
|
|
36
|
+
import signal
|
|
37
|
+
from types import TracebackType
|
|
38
|
+
from typing import Mapping, Sequence, Type
|
|
39
|
+
|
|
40
|
+
import dateutil
|
|
41
|
+
from typing_extensions import Self
|
|
42
|
+
|
|
43
|
+
from nominal_streaming._nominal_streaming import (
|
|
44
|
+
PyNominalDatasetStream,
|
|
45
|
+
PyNominalStreamOpts,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
logger = logging.getLogger(__name__)
|
|
49
|
+
|
|
50
|
+
TimestampLike = str | int | datetime.datetime
|
|
51
|
+
DataType = int | float | str
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _parse_timestamp(ts: str | int | datetime.datetime) -> int:
|
|
55
|
+
if isinstance(ts, int):
|
|
56
|
+
return ts
|
|
57
|
+
elif isinstance(ts, datetime.datetime):
|
|
58
|
+
secs = ts.astimezone(datetime.timezone.utc).timestamp()
|
|
59
|
+
return int(secs * 1e9)
|
|
60
|
+
else:
|
|
61
|
+
# TODO(drake): by involving dateutil, this chops off any nano level precision provided
|
|
62
|
+
# in the timestamp. Update to not lose precision when converting to absolute nanos.
|
|
63
|
+
secs = dateutil.parser.parse(ts).astimezone(datetime.timezone.utc).timestamp()
|
|
64
|
+
return int(secs * 1e9)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class NominalDatasetStream:
|
|
68
|
+
"""Top-level python wrapper for the Rust streaming client to Nominal."""
|
|
69
|
+
|
|
70
|
+
def __init__(self, auth_header: str, opts: PyNominalStreamOpts):
|
|
71
|
+
"""Initializer for dataset stream.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
auth_header: API key or access token to the Nominal API
|
|
75
|
+
opts: Optional options for the underlying stream
|
|
76
|
+
"""
|
|
77
|
+
self._auth_header = auth_header
|
|
78
|
+
self._opts = opts
|
|
79
|
+
self._impl = PyNominalDatasetStream(self._opts)
|
|
80
|
+
self._old_sigint = None
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def create(
|
|
84
|
+
cls,
|
|
85
|
+
auth_header: str,
|
|
86
|
+
base_api_url: str,
|
|
87
|
+
max_points_per_batch: int = 250_000,
|
|
88
|
+
max_request_delay_secs: float = 0.1,
|
|
89
|
+
max_buffered_requests: int = 4,
|
|
90
|
+
num_upload_workers: int = 8,
|
|
91
|
+
num_runtime_workers: int = 8,
|
|
92
|
+
) -> Self:
|
|
93
|
+
"""Factory constructor to build a NominalDatasetStream using optional overrides for configuration options
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
auth_header: API Key or Personal Access Token for accessing the Nominal API
|
|
97
|
+
base_api_url: Base API URL for hitting the Nominal API with.
|
|
98
|
+
max_points_per_batch: Overrides the default number of points that may be sent in a single batch
|
|
99
|
+
max_request_delay_secs: Overrides the default maximum buffering time for data between flushes.
|
|
100
|
+
NOTE: if the amount of data being streamed is greater than available bandwidth, data may be
|
|
101
|
+
buffered longer than the configured duration.
|
|
102
|
+
max_buffered_requests: Overrides the default number of requests that may be buffered between encoding
|
|
103
|
+
threads and upload threads. Increasing this may prevent blocking threads in situations with spotty
|
|
104
|
+
internet, but increase teardown time (e.g. when pressing ctrl + c)
|
|
105
|
+
num_upload_workers: Overrides the default number of upload worker threads
|
|
106
|
+
NOTE: must be set as low as the number of runtime workers.
|
|
107
|
+
num_runtime_workers: Overrides the default number of runtime worker threads
|
|
108
|
+
NOTE: must be set as high as the number of upload workers.
|
|
109
|
+
|
|
110
|
+
"""
|
|
111
|
+
opts = PyNominalStreamOpts(
|
|
112
|
+
max_points_per_batch=max_points_per_batch,
|
|
113
|
+
max_request_delay_secs=max_request_delay_secs,
|
|
114
|
+
max_buffered_requests=max_buffered_requests,
|
|
115
|
+
num_upload_workers=num_upload_workers,
|
|
116
|
+
num_runtime_workers=num_runtime_workers,
|
|
117
|
+
base_api_url=base_api_url,
|
|
118
|
+
)
|
|
119
|
+
return cls(auth_header, opts)
|
|
120
|
+
|
|
121
|
+
def enable_logging(self, log_directive: str = "debug") -> Self:
|
|
122
|
+
"""Enable logging with the given verbosity level
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
log_directive: Log verbosity level to expose from Rust code. Defaults to verbose debug logging.
|
|
126
|
+
See the following for valid values: https://docs.rs/env_logger/latest/env_logger/#enabling-logging
|
|
127
|
+
"""
|
|
128
|
+
logger.info("Setting rust log verbosity to '%s'", log_directive)
|
|
129
|
+
self._impl = self._impl.enable_logging(log_directive)
|
|
130
|
+
return self
|
|
131
|
+
|
|
132
|
+
def with_core_consumer(self, dataset_rid: str) -> Self:
|
|
133
|
+
"""Enables streaming to a Dataset in Core
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
dataset_rid: RID of the Dataset in Nominal to stream to
|
|
137
|
+
"""
|
|
138
|
+
self._impl = self._impl.with_core_consumer(dataset_rid, self._auth_header)
|
|
139
|
+
return self
|
|
140
|
+
|
|
141
|
+
def to_file(self, path: pathlib.Path) -> Self:
|
|
142
|
+
"""Target streaming towards a local `.avro` file
|
|
143
|
+
|
|
144
|
+
The written file will contain snappy-compressed avro data. This can be read as follows:
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from fastavro import reader
|
|
148
|
+
|
|
149
|
+
with open("test.avro", "rb") as f:
|
|
150
|
+
for record in reader(f):
|
|
151
|
+
channel_name = record["channel"]
|
|
152
|
+
tags = record["tags"]
|
|
153
|
+
timestamps = record["timestamps"]
|
|
154
|
+
values = record["values"]
|
|
155
|
+
```
|
|
156
|
+
"""
|
|
157
|
+
self._impl = self._impl.to_file(path)
|
|
158
|
+
return self
|
|
159
|
+
|
|
160
|
+
def with_file_fallback(self, path: pathlib.Path) -> Self:
|
|
161
|
+
"""Setup file fallback for streaming to core
|
|
162
|
+
|
|
163
|
+
The written file will contain snappy-compressed avro data for any batches of data that were unable to make
|
|
164
|
+
it to the backend successfully. This can be read as follows:
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
from fastavro import reader
|
|
168
|
+
|
|
169
|
+
with open("test.avro", "rb") as f:
|
|
170
|
+
for record in reader(f):
|
|
171
|
+
channel_name = record["channel"]
|
|
172
|
+
tags = record["tags"]
|
|
173
|
+
timestamps = record["timestamps"]
|
|
174
|
+
values = record["values"]
|
|
175
|
+
```
|
|
176
|
+
"""
|
|
177
|
+
self._impl = self._impl.with_file_fallback(path)
|
|
178
|
+
return self
|
|
179
|
+
|
|
180
|
+
def open(self) -> Self:
|
|
181
|
+
"""Create the stream as a context manager.
|
|
182
|
+
|
|
183
|
+
NOTE: installs a sigint handler to enable more graceful shutdown.
|
|
184
|
+
This is restored upon exit.
|
|
185
|
+
"""
|
|
186
|
+
if self._old_sigint is not None:
|
|
187
|
+
raise RuntimeError("Stream already opened!")
|
|
188
|
+
|
|
189
|
+
logger.info("Opening underlying stream")
|
|
190
|
+
self._impl.open()
|
|
191
|
+
|
|
192
|
+
# Map Ctrl+C → fast cancel; keep handler tiny and re-raise KeyboardInterrupt.
|
|
193
|
+
def _on_sigint(signum, frame): # type: ignore[no-untyped-def]
|
|
194
|
+
logger.debug("Cancelling underlying stream")
|
|
195
|
+
try:
|
|
196
|
+
self._impl.cancel()
|
|
197
|
+
finally:
|
|
198
|
+
raise KeyboardInterrupt
|
|
199
|
+
|
|
200
|
+
logger.info("Installing sigint handler")
|
|
201
|
+
self._old_sigint = signal.getsignal(signal.SIGINT) # type: ignore[assignment]
|
|
202
|
+
signal.signal(signal.SIGINT, _on_sigint)
|
|
203
|
+
return self
|
|
204
|
+
|
|
205
|
+
def __enter__(self) -> Self:
|
|
206
|
+
"""Create the stream as a context manager.
|
|
207
|
+
|
|
208
|
+
NOTE: installs a sigint handler to enable more graceful shutdown.
|
|
209
|
+
This is restored upon exit.
|
|
210
|
+
"""
|
|
211
|
+
return self.open()
|
|
212
|
+
|
|
213
|
+
def close(self, wait: bool = True) -> None:
|
|
214
|
+
"""Exit the stream and close out any used system resources.
|
|
215
|
+
|
|
216
|
+
NOTE: uninstalls the installed sigint handler and restores any pre-existing sigint handlers
|
|
217
|
+
"""
|
|
218
|
+
try:
|
|
219
|
+
if wait:
|
|
220
|
+
logger.info("Awaiting graceful shutdown")
|
|
221
|
+
self._impl.close()
|
|
222
|
+
else:
|
|
223
|
+
logger.info("Quickly shutting down")
|
|
224
|
+
self._impl.cancel()
|
|
225
|
+
finally:
|
|
226
|
+
if self._old_sigint is not None:
|
|
227
|
+
logger.info("Restoring original sigint handler")
|
|
228
|
+
signal.signal(signal.SIGINT, self._old_sigint)
|
|
229
|
+
self._old_sigint = None
|
|
230
|
+
|
|
231
|
+
def __exit__(
|
|
232
|
+
self,
|
|
233
|
+
exc_type: Type[BaseException] | None,
|
|
234
|
+
exc_value: BaseException | None,
|
|
235
|
+
traceback: TracebackType | None,
|
|
236
|
+
) -> None:
|
|
237
|
+
"""Exit the stream and close out any used system resources.
|
|
238
|
+
|
|
239
|
+
NOTE: uninstalls the installed sigint handler and restores any pre-existing sigint handlers
|
|
240
|
+
"""
|
|
241
|
+
self.close()
|
|
242
|
+
|
|
243
|
+
def enqueue(
|
|
244
|
+
self,
|
|
245
|
+
channel_name: str,
|
|
246
|
+
timestamp: TimestampLike,
|
|
247
|
+
value: DataType,
|
|
248
|
+
tags: Mapping[str, str] | None = None,
|
|
249
|
+
) -> None:
|
|
250
|
+
"""Write a single value to the stream
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
channel_name: Name of the channel to upload data for.
|
|
254
|
+
timestamp: Absolute UTC timestamp of the data being uploaded.
|
|
255
|
+
value: Value to write to the specified channel.
|
|
256
|
+
tags: Key-value tags associated with the data being uploaded.
|
|
257
|
+
"""
|
|
258
|
+
self._impl.enqueue(channel_name, _parse_timestamp(timestamp), value, {**tags} if tags else None)
|
|
259
|
+
|
|
260
|
+
def enqueue_batch(
|
|
261
|
+
self,
|
|
262
|
+
channel_name: str,
|
|
263
|
+
timestamps: Sequence[TimestampLike],
|
|
264
|
+
values: Sequence[DataType],
|
|
265
|
+
tags: Mapping[str, str] | None = None,
|
|
266
|
+
) -> None:
|
|
267
|
+
"""Add a sequence of messages to the queue to upload to Nominal.
|
|
268
|
+
|
|
269
|
+
Messages are added one-by-one (with timestamp normalization) and flushed
|
|
270
|
+
based on the batch conditions.
|
|
271
|
+
|
|
272
|
+
NOTE: assumes that all values have the same type as the first value in the batch--
|
|
273
|
+
ensure that any provided value arrays are homogenously typed
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
channel_name: Name of the channel to upload data for.
|
|
277
|
+
timestamps: Absolute UTC timestamps of the data being uploaded.
|
|
278
|
+
values: Values to write to the specified channel.
|
|
279
|
+
tags: Key-value tags associated with the data being uploaded.
|
|
280
|
+
"""
|
|
281
|
+
self._impl.enqueue_batch(
|
|
282
|
+
channel_name, [_parse_timestamp(ts) for ts in timestamps], values, {**tags} if tags else None
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
def enqueue_from_dict(
|
|
286
|
+
self,
|
|
287
|
+
timestamp: TimestampLike,
|
|
288
|
+
channel_values: Mapping[str, DataType],
|
|
289
|
+
tags: Mapping[str, str] | None = None,
|
|
290
|
+
) -> None:
|
|
291
|
+
"""Write multiple channel values at a given timestamp using a flattened dictionary.
|
|
292
|
+
|
|
293
|
+
Each key in the dictionary is treated as a channel name and the corresponding value
|
|
294
|
+
is enqueued with the given timestamp.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
timestamp: The shared absolute UTC timestamp to use for all items to enqueue.
|
|
298
|
+
channel_values: A dictionary mapping channel names to their respective values.
|
|
299
|
+
tags: Key-value tags associated with the data being uploaded.
|
|
300
|
+
"""
|
|
301
|
+
self._impl.enqueue_from_dict(_parse_timestamp(timestamp), {**channel_values}, {**tags} if tags else None)
|
|
File without changes
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nominal-streaming
|
|
3
|
+
Version: 0.7.9
|
|
4
|
+
Classifier: Programming Language :: Python :: 3
|
|
5
|
+
Classifier: Programming Language :: Rust
|
|
6
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
7
|
+
Classifier: Operating System :: OS Independent
|
|
8
|
+
Requires-Dist: typing-extensions>=4,<5
|
|
9
|
+
Requires-Dist: python-dateutil>=0.0.0
|
|
10
|
+
Summary: Python bindings for the Nominal Rust streaming client
|
|
11
|
+
License: MIT
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
14
|
+
Project-URL: Source Code, https://github.com/nominal-io/nominal-streaming
|
|
15
|
+
|
|
16
|
+
# nominal-streaming Python Bindings
|
|
17
|
+
|
|
18
|
+
`nominal-streaming` is a thin python wrapper around the existing [nominal-streaming rust crate](https://crates.io/crates/nominal-streaming).
|
|
19
|
+
Usage semantics remain largely the same, but with some slight alterations to allow for a more pythonic interface.
|
|
20
|
+
|
|
21
|
+
The library aims to balance three concerns:
|
|
22
|
+
|
|
23
|
+
1. Data should exist in-memory only for a limited, configurable amount of time before it's sent to Core.
|
|
24
|
+
1. Writes should fall back to disk if there are network failures.
|
|
25
|
+
1. Backpressure should be applied to incoming requests when network throughput is saturated.
|
|
26
|
+
|
|
27
|
+
This library streams data to Nominal Core, to a file, or to Nominal Core with a file as backup (recommended to protect against network failures).
|
|
28
|
+
It also provides configuration to manage the tradeoff between above listed concerns.
|
|
29
|
+
|
|
30
|
+
> [!WARNING]
|
|
31
|
+
> This library is still under active development and may make breaking changes.
|
|
32
|
+
|
|
33
|
+
## Usage example: streaming from memory to Nominal Core with file fallback
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import pathlib
|
|
37
|
+
import time
|
|
38
|
+
|
|
39
|
+
from nominal.core import NominalClient
|
|
40
|
+
from nominal_streaming import NominalDatasetStream, PyNominalStreamOpts
|
|
41
|
+
|
|
42
|
+
if __name__ == "__main__":
|
|
43
|
+
num_points = 100_000
|
|
44
|
+
stream = (
|
|
45
|
+
NominalDatasetStream(
|
|
46
|
+
auth_header="<api key>",
|
|
47
|
+
opts=PyNominalStreamOpts(),
|
|
48
|
+
)
|
|
49
|
+
.enable_logging("info") # can set debug, warn, etc.
|
|
50
|
+
.with_core_consumer("<dataset rid>")
|
|
51
|
+
.with_file_fallback(pathlib.Path("local_fallback.avro"))
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
with stream:
|
|
55
|
+
# Stream 100_000 live readings (made up values)
|
|
56
|
+
for idx in range(num_points):
|
|
57
|
+
time_ns = int(time.time() * 1e9)
|
|
58
|
+
value = (idx % 50) + 0.5
|
|
59
|
+
stream.enqueue("channel_name", time_ns, value, tags={"tag_key": "tag_value"})
|
|
60
|
+
|
|
61
|
+
# Stream 100_000 points in one batch
|
|
62
|
+
start_time = int(time.time() * 1e9)
|
|
63
|
+
timestamp_offsets = int(1e9 / 1600)
|
|
64
|
+
timestamps = [start_time + timestamp_offsets * idx for idx in range(num_points)]
|
|
65
|
+
values = [(idx % 50) + 0.5 for idx in range(num_points)]
|
|
66
|
+
stream.enqueue_batch(
|
|
67
|
+
"channel_name",
|
|
68
|
+
timestamps,
|
|
69
|
+
values,
|
|
70
|
+
tags={"tag_key": "tag_value"}
|
|
71
|
+
)
|
|
72
|
+
```
|
|
73
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
nominal_streaming-0.7.9.dist-info/METADATA,sha256=U9EAg2XGHXpwtWdLIq6J5SvGO9qnklcdc2TlfVipsUM,2814
|
|
2
|
+
nominal_streaming-0.7.9.dist-info/WHEEL,sha256=M-9xdoymxPeMYqxLovGnzRp1139eX1kg457fDCQ3bmM,128
|
|
3
|
+
nominal_streaming/__init__.py,sha256=7isEI7NFTEnTNEmxjtDgehQQPPRM-ByrH903VBWrulQ,202
|
|
4
|
+
nominal_streaming/_nominal_streaming.abi3.so,sha256=tSkq8_gy40vSl2iviImZd9lgVVoAqir0RQlefOCsXLg,6681308
|
|
5
|
+
nominal_streaming/_nominal_streaming.pyi,sha256=hZ5TpAFFMZzajSu13yh-klEh81ItvF5C5f_3i0rMfkM,12093
|
|
6
|
+
nominal_streaming/nominal_dataset_stream.py,sha256=0OXLiGDhyT9Q4z0QJlXaV7H3Nd3OWMPi2bjQJKcM5eE,11507
|
|
7
|
+
nominal_streaming/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
nominal_streaming-0.7.9.dist-info/RECORD,,
|