feldera 0.69.0__py3-none-any.whl → 0.189.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of feldera might be problematic. Click here for more details.
- feldera/__init__.py +3 -0
- feldera/_callback_runner.py +64 -85
- feldera/_helpers.py +8 -2
- feldera/enums.py +222 -116
- feldera/output_handler.py +16 -4
- feldera/pipeline.py +718 -261
- feldera/pipeline_builder.py +42 -19
- feldera/rest/_helpers.py +40 -0
- feldera/rest/_httprequests.py +365 -192
- feldera/rest/config.py +44 -30
- feldera/rest/errors.py +16 -0
- feldera/rest/feldera_client.py +694 -153
- feldera/rest/pipeline.py +16 -1
- feldera/runtime_config.py +32 -5
- feldera/stats.py +152 -0
- feldera/tests/test_datafusionize.py +38 -0
- feldera/testutils.py +382 -0
- feldera/testutils_oidc.py +368 -0
- feldera-0.189.0.dist-info/METADATA +163 -0
- feldera-0.189.0.dist-info/RECORD +26 -0
- feldera-0.69.0.dist-info/METADATA +0 -105
- feldera-0.69.0.dist-info/RECORD +0 -21
- {feldera-0.69.0.dist-info → feldera-0.189.0.dist-info}/WHEEL +0 -0
- {feldera-0.69.0.dist-info → feldera-0.189.0.dist-info}/top_level.txt +0 -0
feldera/pipeline.py
CHANGED
|
@@ -1,29 +1,46 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import time
|
|
3
3
|
from datetime import datetime
|
|
4
|
+
import pathlib
|
|
4
5
|
|
|
5
6
|
import pandas
|
|
7
|
+
from uuid import UUID
|
|
6
8
|
|
|
7
9
|
from typing import List, Dict, Callable, Optional, Generator, Mapping, Any
|
|
10
|
+
from threading import Event
|
|
8
11
|
from collections import deque
|
|
9
|
-
from queue import Queue
|
|
10
12
|
|
|
11
13
|
from feldera.rest.errors import FelderaAPIError
|
|
12
|
-
from feldera.enums import
|
|
14
|
+
from feldera.enums import (
|
|
15
|
+
BootstrapPolicy,
|
|
16
|
+
CompletionTokenStatus,
|
|
17
|
+
PipelineFieldSelector,
|
|
18
|
+
PipelineStatus,
|
|
19
|
+
ProgramStatus,
|
|
20
|
+
CheckpointStatus,
|
|
21
|
+
TransactionStatus,
|
|
22
|
+
StorageStatus,
|
|
23
|
+
DeploymentDesiredStatus,
|
|
24
|
+
DeploymentResourcesDesiredStatus,
|
|
25
|
+
DeploymentResourcesStatus,
|
|
26
|
+
DeploymentRuntimeDesiredStatus,
|
|
27
|
+
DeploymentRuntimeStatus,
|
|
28
|
+
)
|
|
13
29
|
from feldera.rest.pipeline import Pipeline as InnerPipeline
|
|
14
30
|
from feldera.rest.feldera_client import FelderaClient
|
|
15
|
-
from feldera._callback_runner import
|
|
31
|
+
from feldera._callback_runner import CallbackRunner
|
|
16
32
|
from feldera.output_handler import OutputHandler
|
|
17
33
|
from feldera._helpers import ensure_dataframe_has_columns, chunk_dataframe
|
|
18
34
|
from feldera.rest.sql_table import SQLTable
|
|
19
35
|
from feldera.rest.sql_view import SQLView
|
|
36
|
+
from feldera.runtime_config import RuntimeConfig
|
|
37
|
+
from feldera.stats import PipelineStatistics
|
|
20
38
|
|
|
21
39
|
|
|
22
40
|
class Pipeline:
|
|
23
41
|
def __init__(self, client: FelderaClient):
|
|
24
42
|
self.client: FelderaClient = client
|
|
25
43
|
self._inner: InnerPipeline | None = None
|
|
26
|
-
self.views_tx: List[Dict[str, Queue]] = []
|
|
27
44
|
|
|
28
45
|
@staticmethod
|
|
29
46
|
def _from_inner(inner: InnerPipeline, client: FelderaClient) -> "Pipeline":
|
|
@@ -31,28 +48,16 @@ class Pipeline:
|
|
|
31
48
|
pipeline._inner = inner
|
|
32
49
|
return pipeline
|
|
33
50
|
|
|
34
|
-
def
|
|
35
|
-
"""
|
|
36
|
-
Internal function used to set up the output listeners.
|
|
37
|
-
|
|
38
|
-
:meta private:
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
for view_queue in self.views_tx:
|
|
42
|
-
for view_name, queue in view_queue.items():
|
|
43
|
-
# sends a message to the callback runner to start listening
|
|
44
|
-
queue.put(_CallbackRunnerInstruction.PipelineStarted)
|
|
45
|
-
# block until the callback runner is ready
|
|
46
|
-
queue.join()
|
|
47
|
-
|
|
48
|
-
def refresh(self):
|
|
51
|
+
def refresh(self, field_selector: PipelineFieldSelector):
|
|
49
52
|
"""
|
|
50
53
|
Calls the backend to get the updated, latest version of the pipeline.
|
|
51
54
|
|
|
55
|
+
:param field_selector: Choose what pipeline information to refresh; see PipelineFieldSelector enum definition.
|
|
56
|
+
|
|
52
57
|
:raises FelderaConnectionError: If there is an issue connecting to the backend.
|
|
53
58
|
"""
|
|
54
59
|
|
|
55
|
-
self._inner = self.client.get_pipeline(self.name)
|
|
60
|
+
self._inner = self.client.get_pipeline(self.name, field_selector)
|
|
56
61
|
|
|
57
62
|
def status(self) -> PipelineStatus:
|
|
58
63
|
"""
|
|
@@ -60,7 +65,7 @@ class Pipeline:
|
|
|
60
65
|
"""
|
|
61
66
|
|
|
62
67
|
try:
|
|
63
|
-
self.refresh()
|
|
68
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
64
69
|
return PipelineStatus.from_str(self._inner.deployment_status)
|
|
65
70
|
|
|
66
71
|
except FelderaAPIError as err:
|
|
@@ -69,6 +74,40 @@ class Pipeline:
|
|
|
69
74
|
else:
|
|
70
75
|
raise err
|
|
71
76
|
|
|
77
|
+
def wait_for_status(
|
|
78
|
+
self, expected_status: PipelineStatus, timeout: Optional[int] = None
|
|
79
|
+
) -> None:
|
|
80
|
+
"""
|
|
81
|
+
Wait for the pipeline to reach the specified status.
|
|
82
|
+
|
|
83
|
+
:param expected_status: The status to wait for
|
|
84
|
+
:param timeout: Maximum time to wait in seconds. If None, waits forever (default: None)
|
|
85
|
+
:raises TimeoutError: If the expected status is not reached within the timeout
|
|
86
|
+
"""
|
|
87
|
+
start_time = time.time()
|
|
88
|
+
|
|
89
|
+
while True:
|
|
90
|
+
current_status = self.status()
|
|
91
|
+
if current_status == expected_status:
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
if timeout is not None and time.time() - start_time >= timeout:
|
|
95
|
+
raise TimeoutError(
|
|
96
|
+
f"Pipeline did not reach {expected_status.name} status within {timeout} seconds"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
time.sleep(1)
|
|
100
|
+
|
|
101
|
+
def stats(self) -> PipelineStatistics:
|
|
102
|
+
"""Gets the pipeline metrics and performance counters."""
|
|
103
|
+
|
|
104
|
+
return PipelineStatistics.from_dict(self.client.get_pipeline_stats(self.name))
|
|
105
|
+
|
|
106
|
+
def logs(self) -> Generator[str, None, None]:
|
|
107
|
+
"""Gets the pipeline logs."""
|
|
108
|
+
|
|
109
|
+
return self.client.get_pipeline_logs(self.name)
|
|
110
|
+
|
|
72
111
|
def input_pandas(self, table_name: str, df: pandas.DataFrame, force: bool = False):
|
|
73
112
|
"""
|
|
74
113
|
Push all rows in a pandas DataFrame to the pipeline.
|
|
@@ -99,14 +138,13 @@ class Pipeline:
|
|
|
99
138
|
|
|
100
139
|
ensure_dataframe_has_columns(df)
|
|
101
140
|
|
|
102
|
-
pipeline = self.client.get_pipeline(self.name)
|
|
141
|
+
pipeline = self.client.get_pipeline(self.name, PipelineFieldSelector.ALL)
|
|
103
142
|
if table_name.lower() != "now" and table_name.lower() not in [
|
|
104
143
|
tbl.name.lower() for tbl in pipeline.tables
|
|
105
144
|
]:
|
|
106
145
|
raise ValueError(
|
|
107
|
-
f"Cannot push to table '{
|
|
108
|
-
|
|
109
|
-
}': table with this name does not exist in the '{self.name}' pipeline"
|
|
146
|
+
f"Cannot push to table '{table_name}': table with this name"
|
|
147
|
+
f" does not exist in the '{self.name}' pipeline"
|
|
110
148
|
)
|
|
111
149
|
else:
|
|
112
150
|
# consider validating the schema here
|
|
@@ -129,6 +167,7 @@ class Pipeline:
|
|
|
129
167
|
data: Dict | list,
|
|
130
168
|
update_format: str = "raw",
|
|
131
169
|
force: bool = False,
|
|
170
|
+
wait: bool = True,
|
|
132
171
|
):
|
|
133
172
|
"""
|
|
134
173
|
Push this JSON data to the specified table of the pipeline.
|
|
@@ -140,8 +179,9 @@ class Pipeline:
|
|
|
140
179
|
:param data: The JSON encoded data to be pushed to the pipeline. The data should be in the form:
|
|
141
180
|
`{'col1': 'val1', 'col2': 'val2'}` or `[{'col1': 'val1', 'col2': 'val2'}, {'col1': 'val1', 'col2': 'val2'}]`
|
|
142
181
|
:param update_format: The update format of the JSON data to be pushed to the pipeline. Must be one of:
|
|
143
|
-
"raw", "insert_delete".
|
|
182
|
+
"raw", "insert_delete". https://docs.feldera.com/formats/json#the-insertdelete-format
|
|
144
183
|
:param force: `True` to push data even if the pipeline is paused. `False` by default.
|
|
184
|
+
:param wait: If True, blocks until this input has been processed by the pipeline
|
|
145
185
|
|
|
146
186
|
:raises ValueError: If the update format is invalid.
|
|
147
187
|
:raises FelderaAPIError: If the pipeline is not in a valid state to push data.
|
|
@@ -164,6 +204,7 @@ class Pipeline:
|
|
|
164
204
|
update_format=update_format,
|
|
165
205
|
array=array,
|
|
166
206
|
force=force,
|
|
207
|
+
wait=wait,
|
|
167
208
|
)
|
|
168
209
|
|
|
169
210
|
def pause_connector(self, table_name: str, connector_name: str):
|
|
@@ -175,7 +216,7 @@ class Pipeline:
|
|
|
175
216
|
All connectors are RUNNING by default.
|
|
176
217
|
|
|
177
218
|
Refer to the connector documentation for more information:
|
|
178
|
-
|
|
219
|
+
https://docs.feldera.com/connectors/#input-connector-orchestration
|
|
179
220
|
|
|
180
221
|
:param table_name: The name of the table that the connector is attached to.
|
|
181
222
|
:param connector_name: The name of the connector to pause.
|
|
@@ -194,7 +235,7 @@ class Pipeline:
|
|
|
194
235
|
All connectors are RUNNING by default.
|
|
195
236
|
|
|
196
237
|
Refer to the connector documentation for more information:
|
|
197
|
-
|
|
238
|
+
https://docs.feldera.com/connectors/#input-connector-orchestration
|
|
198
239
|
|
|
199
240
|
:param table_name: The name of the table that the connector is attached to.
|
|
200
241
|
:param connector_name: The name of the connector to resume.
|
|
@@ -207,23 +248,21 @@ class Pipeline:
|
|
|
207
248
|
def listen(self, view_name: str) -> OutputHandler:
|
|
208
249
|
"""
|
|
209
250
|
Follow the change stream (i.e., the output) of the provided view.
|
|
210
|
-
Returns an output
|
|
251
|
+
Returns an output handle to read the changes.
|
|
211
252
|
|
|
212
|
-
When the pipeline is
|
|
253
|
+
When the pipeline is stopped, the handle is dropped.
|
|
213
254
|
|
|
214
|
-
|
|
215
|
-
|
|
255
|
+
The handle will only receive changes from the point in time when the listener is created.
|
|
256
|
+
In order to receive all changes since the pipeline started, you can create the pipeline in the `PAUSED` state
|
|
257
|
+
using :meth:`start_paused`, attach listeners and unpause the pipeline using :meth:`resume`.
|
|
216
258
|
|
|
217
259
|
:param view_name: The name of the view to listen to.
|
|
218
260
|
"""
|
|
219
261
|
|
|
220
|
-
queue: Optional[Queue] = None
|
|
221
|
-
|
|
222
262
|
if self.status() not in [PipelineStatus.PAUSED, PipelineStatus.RUNNING]:
|
|
223
|
-
|
|
224
|
-
self.views_tx.append({view_name: queue})
|
|
263
|
+
raise RuntimeError("Pipeline must be running or paused to listen to output")
|
|
225
264
|
|
|
226
|
-
handler = OutputHandler(self.client, self.name, view_name
|
|
265
|
+
handler = OutputHandler(self.client, self.name, view_name)
|
|
227
266
|
handler.start()
|
|
228
267
|
|
|
229
268
|
return handler
|
|
@@ -234,8 +273,9 @@ class Pipeline:
|
|
|
234
273
|
"""
|
|
235
274
|
Run the given callback on each chunk of the output of the specified view.
|
|
236
275
|
|
|
237
|
-
|
|
238
|
-
|
|
276
|
+
The callback will only receive changes from the point in time when the listener is created.
|
|
277
|
+
In order to receive all changes since the pipeline started, you can create the pipeline in the `PAUSED` state
|
|
278
|
+
using :meth:`start_paused`, attach listeners and unpause the pipeline using :meth:`resume`.
|
|
239
279
|
|
|
240
280
|
:param view_name: The name of the view.
|
|
241
281
|
:param callback: The callback to run on each chunk. The callback should take two arguments:
|
|
@@ -253,34 +293,37 @@ class Pipeline:
|
|
|
253
293
|
|
|
254
294
|
"""
|
|
255
295
|
|
|
256
|
-
queue: Optional[Queue] = None
|
|
257
|
-
|
|
258
296
|
if self.status() not in [PipelineStatus.RUNNING, PipelineStatus.PAUSED]:
|
|
259
|
-
|
|
260
|
-
self.views_tx.append({view_name: queue})
|
|
297
|
+
raise RuntimeError("Pipeline must be running or paused to listen to output")
|
|
261
298
|
|
|
262
|
-
|
|
299
|
+
event = Event()
|
|
300
|
+
handler = CallbackRunner(
|
|
301
|
+
self.client, self.name, view_name, callback, lambda exception: None, event
|
|
302
|
+
)
|
|
263
303
|
handler.start()
|
|
304
|
+
event.wait()
|
|
264
305
|
|
|
265
306
|
def wait_for_completion(
|
|
266
|
-
self,
|
|
307
|
+
self, force_stop: bool = False, timeout_s: float | None = None
|
|
267
308
|
):
|
|
268
309
|
"""
|
|
269
310
|
Block until the pipeline has completed processing all input records.
|
|
270
311
|
|
|
271
|
-
This method blocks until (1) all input connectors attached to the
|
|
272
|
-
have finished reading their input data sources and issued
|
|
273
|
-
notifications to the pipeline, and (2) all inputs received
|
|
274
|
-
connectors have been fully processed and corresponding
|
|
275
|
-
sent out through the output connectors.
|
|
312
|
+
This method blocks until (1) all input connectors attached to the
|
|
313
|
+
pipeline have finished reading their input data sources and issued
|
|
314
|
+
end-of-input notifications to the pipeline, and (2) all inputs received
|
|
315
|
+
from these connectors have been fully processed and corresponding
|
|
316
|
+
outputs have been sent out through the output connectors.
|
|
276
317
|
|
|
277
318
|
This method will block indefinitely if at least one of the input
|
|
278
319
|
connectors attached to the pipeline is a streaming connector, such as
|
|
279
320
|
Kafka, that does not issue the end-of-input notification.
|
|
280
321
|
|
|
281
|
-
:param
|
|
282
|
-
|
|
283
|
-
|
|
322
|
+
:param force_stop: If True, the pipeline will be forcibly stopped after
|
|
323
|
+
completion. False by default. No checkpoints will be made.
|
|
324
|
+
:param timeout_s: Optional. The maximum time (in seconds) to wait for
|
|
325
|
+
the pipeline to complete. The default is None, which means wait
|
|
326
|
+
indefinitely.
|
|
284
327
|
|
|
285
328
|
:raises RuntimeError: If the pipeline returns unknown metrics.
|
|
286
329
|
"""
|
|
@@ -289,6 +332,7 @@ class Pipeline:
|
|
|
289
332
|
PipelineStatus.RUNNING,
|
|
290
333
|
PipelineStatus.INITIALIZING,
|
|
291
334
|
PipelineStatus.PROVISIONING,
|
|
335
|
+
PipelineStatus.BOOTSTRAPPING,
|
|
292
336
|
]:
|
|
293
337
|
raise RuntimeError("Pipeline must be running to wait for completion")
|
|
294
338
|
|
|
@@ -299,99 +343,44 @@ class Pipeline:
|
|
|
299
343
|
elapsed = time.monotonic() - start_time
|
|
300
344
|
if elapsed > timeout_s:
|
|
301
345
|
raise TimeoutError(
|
|
302
|
-
f"timeout ({timeout_s}s) reached while waiting for
|
|
303
|
-
|
|
304
|
-
}' to complete"
|
|
346
|
+
f"timeout ({timeout_s}s) reached while waiting for"
|
|
347
|
+
f" pipeline '{self.name}' to complete"
|
|
305
348
|
)
|
|
306
349
|
logging.debug(
|
|
307
|
-
f"waiting for pipeline {self.name} to complete: elapsed
|
|
308
|
-
|
|
309
|
-
}s, timeout: {timeout_s}s"
|
|
350
|
+
f"waiting for pipeline {self.name} to complete: elapsed"
|
|
351
|
+
f" time {elapsed}s, timeout: {timeout_s}s"
|
|
310
352
|
)
|
|
311
353
|
|
|
312
|
-
|
|
313
|
-
"global_metrics"
|
|
314
|
-
)
|
|
315
|
-
pipeline_complete: bool = metrics.get("pipeline_complete")
|
|
316
|
-
|
|
354
|
+
pipeline_complete: bool = self.is_complete()
|
|
317
355
|
if pipeline_complete is None:
|
|
318
356
|
raise RuntimeError(
|
|
319
357
|
"received unknown metrics from the pipeline, pipeline_complete is None"
|
|
320
358
|
)
|
|
321
|
-
|
|
322
|
-
if pipeline_complete:
|
|
359
|
+
elif pipeline_complete:
|
|
323
360
|
break
|
|
324
361
|
|
|
325
362
|
time.sleep(1)
|
|
326
363
|
|
|
327
|
-
if
|
|
328
|
-
self.
|
|
329
|
-
|
|
330
|
-
def __failed_check(self, next):
|
|
331
|
-
"""
|
|
332
|
-
Checks if the pipeline is in FAILED state and raises an error if it is.
|
|
333
|
-
:meta private:
|
|
334
|
-
"""
|
|
335
|
-
status = self.status()
|
|
336
|
-
if status == PipelineStatus.FAILED:
|
|
337
|
-
deployment_error = self.client.get_pipeline(self.name).deployment_error
|
|
338
|
-
error_msg = deployment_error.get("message", "")
|
|
339
|
-
raise RuntimeError(
|
|
340
|
-
f"""Cannot {next} pipeline '{self.name}' in FAILED state.
|
|
341
|
-
The pipeline must be in SHUTDOWN state before it can be started, but it is currently in FAILED state.
|
|
342
|
-
Use `Pipeline.shutdown()` method to shut down the pipeline.
|
|
343
|
-
Error Message:
|
|
344
|
-
{error_msg}"""
|
|
345
|
-
)
|
|
346
|
-
|
|
347
|
-
def start(self, timeout_s: Optional[float] = None):
|
|
348
|
-
"""
|
|
349
|
-
.. _start:
|
|
350
|
-
|
|
351
|
-
Starts this pipeline.
|
|
352
|
-
|
|
353
|
-
The pipeline must be in SHUTDOWN state to start.
|
|
354
|
-
If the pipeline is in any other state, an error will be raised.
|
|
355
|
-
If the pipeline is in PAUSED state, use `.meth:resume` instead.
|
|
356
|
-
If the pipeline is in FAILED state, it must be shutdown before starting it again.
|
|
357
|
-
|
|
358
|
-
:param timeout_s: The maximum time (in seconds) to wait for the pipeline to start.
|
|
359
|
-
|
|
360
|
-
:raises RuntimeError: If the pipeline is not in SHUTDOWN state.
|
|
361
|
-
"""
|
|
362
|
-
|
|
363
|
-
self.__failed_check("start")
|
|
364
|
-
status = self.status()
|
|
365
|
-
if status != PipelineStatus.SHUTDOWN:
|
|
366
|
-
raise RuntimeError(
|
|
367
|
-
f"""Cannot start pipeline '{self.name}' in state '{str(status.name)}'.
|
|
368
|
-
The pipeline must be in SHUTDOWN state before it can be started.
|
|
369
|
-
You can either shut down the pipeline using the `Pipeline.shutdown()` method or use `Pipeline.resume()` to \
|
|
370
|
-
resume a paused pipeline."""
|
|
371
|
-
)
|
|
372
|
-
|
|
373
|
-
self.client.pause_pipeline(
|
|
374
|
-
self.name, "Unable to START the pipeline.", timeout_s
|
|
375
|
-
)
|
|
376
|
-
self.__setup_output_listeners()
|
|
377
|
-
self.resume(timeout_s)
|
|
364
|
+
if force_stop:
|
|
365
|
+
self.stop(force=True)
|
|
378
366
|
|
|
379
|
-
def
|
|
367
|
+
def is_complete(self) -> bool:
|
|
380
368
|
"""
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
This method **SHUTS DOWN** the pipeline regardless of its current state and then starts it again.
|
|
369
|
+
Check if the pipeline has completed processing all input records.
|
|
384
370
|
|
|
385
|
-
|
|
371
|
+
Returns True if (1) all input connectors attached to the
|
|
372
|
+
pipeline have finished reading their input data sources and issued
|
|
373
|
+
end-of-input notifications to the pipeline, and (2) all inputs received
|
|
374
|
+
from these connectors have been fully processed and corresponding
|
|
375
|
+
outputs have been sent out through the output connectors.
|
|
386
376
|
"""
|
|
387
377
|
|
|
388
|
-
self.
|
|
389
|
-
self.start(timeout_s)
|
|
378
|
+
return self.stats().global_metrics.pipeline_complete
|
|
390
379
|
|
|
391
380
|
def wait_for_idle(
|
|
392
381
|
self,
|
|
393
382
|
idle_interval_s: float = 5.0,
|
|
394
|
-
timeout_s: float =
|
|
383
|
+
timeout_s: float | None = None,
|
|
395
384
|
poll_interval_s: float = 0.2,
|
|
396
385
|
):
|
|
397
386
|
"""
|
|
@@ -411,17 +400,15 @@ resume a paused pipeline."""
|
|
|
411
400
|
:raises RuntimeError: If the metrics are missing or the timeout was
|
|
412
401
|
reached.
|
|
413
402
|
"""
|
|
414
|
-
if idle_interval_s > timeout_s:
|
|
403
|
+
if timeout_s is not None and idle_interval_s > timeout_s:
|
|
415
404
|
raise ValueError(
|
|
416
|
-
f"idle interval ({idle_interval_s}s) cannot be larger than
|
|
417
|
-
|
|
418
|
-
}s)"
|
|
405
|
+
f"idle interval ({idle_interval_s}s) cannot be larger than"
|
|
406
|
+
f" timeout ({timeout_s}s)"
|
|
419
407
|
)
|
|
420
|
-
if poll_interval_s > timeout_s:
|
|
408
|
+
if timeout_s is not None and poll_interval_s > timeout_s:
|
|
421
409
|
raise ValueError(
|
|
422
|
-
f"poll interval ({poll_interval_s}s) cannot be larger than
|
|
423
|
-
|
|
424
|
-
}s)"
|
|
410
|
+
f"poll interval ({poll_interval_s}s) cannot be larger than"
|
|
411
|
+
f" timeout ({timeout_s}s)"
|
|
425
412
|
)
|
|
426
413
|
if poll_interval_s > idle_interval_s:
|
|
427
414
|
raise ValueError(
|
|
@@ -436,18 +423,17 @@ resume a paused pipeline."""
|
|
|
436
423
|
now_s = time.monotonic()
|
|
437
424
|
|
|
438
425
|
# Metrics retrieval
|
|
439
|
-
metrics
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
total_input_records
|
|
443
|
-
total_processed_records: int | None = metrics.get("total_processed_records")
|
|
444
|
-
if total_input_records is None:
|
|
426
|
+
metrics = self.stats().global_metrics
|
|
427
|
+
total_input_records = metrics.total_input_records
|
|
428
|
+
total_processed_records = metrics.total_processed_records
|
|
429
|
+
if metrics.total_input_records is None:
|
|
445
430
|
raise RuntimeError(
|
|
446
431
|
"total_input_records is missing from the pipeline metrics"
|
|
447
432
|
)
|
|
448
|
-
if total_processed_records is None:
|
|
433
|
+
if metrics.total_processed_records is None:
|
|
449
434
|
raise RuntimeError(
|
|
450
|
-
"total_processed_records is missing from the pipeline
|
|
435
|
+
"""total_processed_records is missing from the pipeline \
|
|
436
|
+
metrics"""
|
|
451
437
|
)
|
|
452
438
|
|
|
453
439
|
# Idle check
|
|
@@ -465,80 +451,236 @@ resume a paused pipeline."""
|
|
|
465
451
|
return
|
|
466
452
|
|
|
467
453
|
# Timeout
|
|
468
|
-
if now_s - start_time_s >= timeout_s:
|
|
454
|
+
if timeout_s is not None and now_s - start_time_s >= timeout_s:
|
|
469
455
|
raise RuntimeError(f"waiting for idle reached timeout ({timeout_s}s)")
|
|
470
456
|
time.sleep(poll_interval_s)
|
|
471
457
|
|
|
472
|
-
def
|
|
458
|
+
def activate(
|
|
459
|
+
self, wait: bool = True, timeout_s: Optional[float] = None
|
|
460
|
+
) -> Optional[PipelineStatus]:
|
|
461
|
+
"""
|
|
462
|
+
Activates the pipeline when starting from STANDBY mode. Only applicable
|
|
463
|
+
when the pipeline is starting from a checkpoint in object store.
|
|
464
|
+
|
|
465
|
+
:param wait: Set True to wait for the pipeline to activate. True by
|
|
466
|
+
default
|
|
467
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
468
|
+
pipeline to pause.
|
|
469
|
+
"""
|
|
470
|
+
|
|
471
|
+
return self.client.activate_pipeline(self.name, wait=wait, timeout_s=timeout_s)
|
|
472
|
+
|
|
473
|
+
def start(
|
|
474
|
+
self,
|
|
475
|
+
bootstrap_policy: Optional[BootstrapPolicy] = None,
|
|
476
|
+
wait: bool = True,
|
|
477
|
+
timeout_s: Optional[float] = None,
|
|
478
|
+
):
|
|
479
|
+
"""
|
|
480
|
+
.. _start:
|
|
481
|
+
|
|
482
|
+
Starts this pipeline.
|
|
483
|
+
|
|
484
|
+
- The pipeline must be in STOPPED state to start.
|
|
485
|
+
- If the pipeline is in any other state, an error will be raised.
|
|
486
|
+
- If the pipeline is in PAUSED state, use `.meth:resume` instead.
|
|
487
|
+
|
|
488
|
+
:param bootstrap_policy: The bootstrap policy to use.
|
|
489
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
490
|
+
pipeline to start.
|
|
491
|
+
:param wait: Set True to wait for the pipeline to start. True by default
|
|
492
|
+
|
|
493
|
+
:raises RuntimeError: If the pipeline is not in STOPPED state.
|
|
494
|
+
"""
|
|
495
|
+
|
|
496
|
+
self.client.start_pipeline(
|
|
497
|
+
self.name, bootstrap_policy=bootstrap_policy, wait=wait, timeout_s=timeout_s
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
def start_paused(
|
|
501
|
+
self,
|
|
502
|
+
bootstrap_policy: Optional[BootstrapPolicy] = None,
|
|
503
|
+
wait: bool = True,
|
|
504
|
+
timeout_s: Optional[float] = None,
|
|
505
|
+
):
|
|
506
|
+
"""
|
|
507
|
+
Starts the pipeline in the paused state.
|
|
508
|
+
"""
|
|
509
|
+
|
|
510
|
+
return self.client.start_pipeline_as_paused(
|
|
511
|
+
self.name, bootstrap_policy=bootstrap_policy, wait=wait, timeout_s=timeout_s
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
def start_standby(
|
|
515
|
+
self,
|
|
516
|
+
bootstrap_policy: Optional[BootstrapPolicy] = None,
|
|
517
|
+
wait: bool = True,
|
|
518
|
+
timeout_s: Optional[float] = None,
|
|
519
|
+
):
|
|
520
|
+
"""
|
|
521
|
+
Starts the pipeline in the standby state.
|
|
522
|
+
"""
|
|
523
|
+
|
|
524
|
+
self.client.start_pipeline_as_standby(
|
|
525
|
+
self.name, bootstrap_policy=bootstrap_policy, wait=wait, timeout_s=timeout_s
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
def restart(
|
|
529
|
+
self,
|
|
530
|
+
bootstrap_policy: Optional[BootstrapPolicy] = None,
|
|
531
|
+
timeout_s: Optional[float] = None,
|
|
532
|
+
):
|
|
533
|
+
"""
|
|
534
|
+
Restarts the pipeline.
|
|
535
|
+
|
|
536
|
+
This method forcibly **STOPS** the pipeline regardless of its current
|
|
537
|
+
state and then starts it again. No checkpoints are made when stopping
|
|
538
|
+
the pipeline.
|
|
539
|
+
|
|
540
|
+
:param bootstrap_policy: The bootstrap policy to use.
|
|
541
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
542
|
+
pipeline to restart.
|
|
543
|
+
"""
|
|
544
|
+
|
|
545
|
+
self.stop(force=True, timeout_s=timeout_s)
|
|
546
|
+
self.start(bootstrap_policy=bootstrap_policy, timeout_s=timeout_s)
|
|
547
|
+
|
|
548
|
+
def pause(self, wait: bool = True, timeout_s: Optional[float] = None):
|
|
473
549
|
"""
|
|
474
550
|
Pause the pipeline.
|
|
475
551
|
|
|
476
|
-
The pipeline can only transition to the PAUSED state from the RUNNING
|
|
477
|
-
If the pipeline is already paused, it will remain in the PAUSED
|
|
552
|
+
The pipeline can only transition to the PAUSED state from the RUNNING
|
|
553
|
+
state. If the pipeline is already paused, it will remain in the PAUSED
|
|
554
|
+
state.
|
|
555
|
+
|
|
556
|
+
:param wait: Set True to wait for the pipeline to pause. True by default
|
|
557
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
558
|
+
pipeline to pause.
|
|
559
|
+
"""
|
|
478
560
|
|
|
479
|
-
|
|
561
|
+
self.client.pause_pipeline(self.name, wait=wait, timeout_s=timeout_s)
|
|
480
562
|
|
|
481
|
-
|
|
563
|
+
def stop(self, force: bool, wait: bool = True, timeout_s: Optional[float] = None):
|
|
482
564
|
"""
|
|
565
|
+
Stops the pipeline.
|
|
483
566
|
|
|
484
|
-
|
|
485
|
-
self.client.pause_pipeline(self.name, timeout_s=timeout_s)
|
|
567
|
+
Stops the pipeline regardless of its current state.
|
|
486
568
|
|
|
487
|
-
|
|
569
|
+
:param force: Set True to immediately scale compute resources to zero.
|
|
570
|
+
Set False to automatically checkpoint before stopping.
|
|
571
|
+
:param wait: Set True to gracefully shutdown listeners and wait for the
|
|
572
|
+
pipeline to stop. True by default.
|
|
573
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
574
|
+
pipeline to stop.
|
|
488
575
|
"""
|
|
489
|
-
Shut down the pipeline.
|
|
490
576
|
|
|
491
|
-
|
|
577
|
+
self.client.stop_pipeline(
|
|
578
|
+
self.name, force=force, wait=wait, timeout_s=timeout_s
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
def approve(self):
|
|
582
|
+
"""
|
|
583
|
+
Approves the pipeline to proceed with bootstrapping.
|
|
492
584
|
|
|
493
|
-
|
|
585
|
+
This method is used when a pipeline has been started with
|
|
586
|
+
`bootstrap_policy=BootstrapPolicy.AWAIT_APPROVAL` and is currently in the
|
|
587
|
+
AWAITINGAPPROVAL state. The pipeline will wait for explicit user approval
|
|
588
|
+
before proceeding with the bootstrapping process.
|
|
494
589
|
"""
|
|
495
590
|
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
591
|
+
self.client.approve_pipeline(self.name)
|
|
592
|
+
|
|
593
|
+
def resume(self, wait: bool = True, timeout_s: Optional[float] = None):
|
|
594
|
+
"""
|
|
595
|
+
Resumes the pipeline from the PAUSED state. If the pipeline is already
|
|
596
|
+
running, it will remain in the RUNNING state.
|
|
597
|
+
|
|
598
|
+
:param wait: Set True to wait for the pipeline to resume. True by default
|
|
599
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
600
|
+
pipeline to resume.
|
|
601
|
+
"""
|
|
602
|
+
|
|
603
|
+
self.client.resume_pipeline(self.name, wait=wait, timeout_s=timeout_s)
|
|
604
|
+
|
|
605
|
+
def start_transaction(self) -> int:
|
|
606
|
+
"""
|
|
607
|
+
Start a new transaction.
|
|
608
|
+
|
|
609
|
+
:return: Transaction ID.
|
|
610
|
+
|
|
611
|
+
:raises FelderaAPIError: If the pipeline fails to start a transaction, e.g., if the pipeline is not running or
|
|
612
|
+
there is already an active transaction.
|
|
613
|
+
"""
|
|
502
614
|
|
|
503
|
-
self.client.
|
|
615
|
+
return self.client.start_transaction(self.name)
|
|
504
616
|
|
|
505
|
-
def
|
|
617
|
+
def commit_transaction(
|
|
618
|
+
self,
|
|
619
|
+
transaction_id: Optional[int] = None,
|
|
620
|
+
wait: bool = True,
|
|
621
|
+
timeout_s: Optional[float] = None,
|
|
622
|
+
):
|
|
506
623
|
"""
|
|
507
|
-
|
|
624
|
+
Commit the currently active transaction.
|
|
625
|
+
|
|
626
|
+
:param transaction_id: If provided, the function verifies that the currently active transaction matches this ID.
|
|
627
|
+
If the active transaction ID does not match, the function raises an error.
|
|
508
628
|
|
|
509
|
-
:param
|
|
629
|
+
:param wait: If True, the function blocks until the transaction either commits successfully or the timeout is reached.
|
|
630
|
+
If False, the function initiates the commit and returns immediately without waiting for completion. The default value is True.
|
|
631
|
+
|
|
632
|
+
:param timeout_s: Maximum time (in seconds) to wait for the transaction to commit when `wait` is True.
|
|
633
|
+
If None, the function will wait indefinitely.
|
|
634
|
+
|
|
635
|
+
:raises RuntimeError: If there is currently no transaction in progress.
|
|
636
|
+
:raises ValueError: If the provided `transaction_id` does not match the current transaction.
|
|
637
|
+
:raises TimeoutError: If the transaction does not commit within the specified timeout (when `wait` is True).
|
|
638
|
+
:raises FelderaAPIError: If the pipeline fails to commit a transaction.
|
|
510
639
|
"""
|
|
511
640
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
queue.join()
|
|
641
|
+
self.client.commit_transaction(self.name, transaction_id, wait, timeout_s)
|
|
642
|
+
|
|
643
|
+
def transaction_status(self) -> TransactionStatus:
|
|
644
|
+
"""
|
|
645
|
+
Get pipeline's transaction handling status.
|
|
518
646
|
|
|
519
|
-
|
|
647
|
+
:return: Current transaction handling status of the pipeline.
|
|
520
648
|
|
|
521
|
-
|
|
649
|
+
:raises FelderaAPIError: If pipeline's status couldn't be read, e.g., because the pipeline is not currently running.
|
|
522
650
|
"""
|
|
523
|
-
Resumes the pipeline from the PAUSED state. If the pipeline is already running, it will remain in the RUNNING state.
|
|
524
651
|
|
|
525
|
-
|
|
652
|
+
return self.stats().global_metrics.transaction_status
|
|
526
653
|
|
|
527
|
-
|
|
654
|
+
def transaction_id(self) -> Optional[int]:
|
|
528
655
|
"""
|
|
656
|
+
Gets the ID of the currently active transaction or None if there is no active transaction.
|
|
529
657
|
|
|
530
|
-
|
|
531
|
-
|
|
658
|
+
:return: The ID of the transaction.
|
|
659
|
+
"""
|
|
660
|
+
|
|
661
|
+
transaction_id = self.stats().global_metrics.transaction_id
|
|
662
|
+
|
|
663
|
+
if transaction_id == 0:
|
|
664
|
+
return None
|
|
665
|
+
else:
|
|
666
|
+
return transaction_id
|
|
532
667
|
|
|
533
|
-
def delete(self):
|
|
668
|
+
def delete(self, clear_storage: bool = False):
|
|
534
669
|
"""
|
|
535
670
|
Deletes the pipeline.
|
|
536
671
|
|
|
537
|
-
The pipeline must be
|
|
672
|
+
The pipeline must be stopped, and the storage cleared before it can be
|
|
673
|
+
deleted.
|
|
538
674
|
|
|
539
|
-
:
|
|
675
|
+
:param clear_storage: True if the storage should be cleared before
|
|
676
|
+
deletion. False by default
|
|
677
|
+
|
|
678
|
+
:raises FelderaAPIError: If the pipeline is not in STOPPED state or the
|
|
679
|
+
storage is still bound.
|
|
540
680
|
"""
|
|
541
681
|
|
|
682
|
+
if clear_storage:
|
|
683
|
+
self.clear_storage()
|
|
542
684
|
self.client.delete_pipeline(self.name)
|
|
543
685
|
|
|
544
686
|
@staticmethod
|
|
@@ -551,21 +693,35 @@ resume a paused pipeline."""
|
|
|
551
693
|
"""
|
|
552
694
|
|
|
553
695
|
try:
|
|
554
|
-
inner = client.get_pipeline(name)
|
|
696
|
+
inner = client.get_pipeline(name, PipelineFieldSelector.ALL)
|
|
555
697
|
return Pipeline._from_inner(inner, client)
|
|
556
698
|
except FelderaAPIError as err:
|
|
557
699
|
if err.status_code == 404:
|
|
558
|
-
|
|
700
|
+
err.message = f"Pipeline with name {name} not found"
|
|
701
|
+
raise err
|
|
702
|
+
|
|
703
|
+
@staticmethod
|
|
704
|
+
def all(client: FelderaClient) -> List["Pipeline"]:
|
|
705
|
+
"""
|
|
706
|
+
Get all pipelines.
|
|
707
|
+
|
|
708
|
+
:param client: The FelderaClient instance.
|
|
709
|
+
:return: A list of Pipeline objects.
|
|
710
|
+
"""
|
|
711
|
+
|
|
712
|
+
return [Pipeline._from_inner(p, client) for p in client.pipelines()]
|
|
559
713
|
|
|
560
|
-
def checkpoint(self, wait: bool = False, timeout_s=
|
|
714
|
+
def checkpoint(self, wait: bool = False, timeout_s: Optional[float] = None) -> int:
|
|
561
715
|
"""
|
|
562
|
-
Checkpoints this pipeline
|
|
563
|
-
Fault Tolerance in Feldera: <https://docs.feldera.com/pipelines/fault-tolerance/>
|
|
716
|
+
Checkpoints this pipeline.
|
|
564
717
|
|
|
565
718
|
:param wait: If true, will block until the checkpoint completes.
|
|
566
|
-
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
719
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
720
|
+
checkpoint to complete.
|
|
567
721
|
|
|
568
|
-
:
|
|
722
|
+
:return: The checkpoint sequence number.
|
|
723
|
+
|
|
724
|
+
:raises FelderaAPIError: If enterprise features are not enabled.
|
|
569
725
|
"""
|
|
570
726
|
|
|
571
727
|
seq = self.client.checkpoint_pipeline(self.name)
|
|
@@ -577,20 +733,17 @@ resume a paused pipeline."""
|
|
|
577
733
|
|
|
578
734
|
while True:
|
|
579
735
|
elapsed = time.monotonic() - start
|
|
580
|
-
if elapsed > timeout_s:
|
|
736
|
+
if timeout_s is not None and elapsed > timeout_s:
|
|
581
737
|
raise TimeoutError(
|
|
582
|
-
f"timeout ({timeout_s}s) reached while waiting for
|
|
583
|
-
|
|
584
|
-
}' to make checkpoint '{seq}'"
|
|
738
|
+
f"""timeout ({timeout_s}s) reached while waiting for \
|
|
739
|
+
pipeline '{self.name}' to make checkpoint '{seq}'"""
|
|
585
740
|
)
|
|
586
741
|
status = self.checkpoint_status(seq)
|
|
587
742
|
if status == CheckpointStatus.InProgress:
|
|
588
743
|
time.sleep(0.1)
|
|
589
744
|
continue
|
|
590
745
|
|
|
591
|
-
return
|
|
592
|
-
|
|
593
|
-
return seq
|
|
746
|
+
return seq
|
|
594
747
|
|
|
595
748
|
def checkpoint_status(self, seq: int) -> CheckpointStatus:
|
|
596
749
|
"""
|
|
@@ -616,14 +769,19 @@ resume a paused pipeline."""
|
|
|
616
769
|
if seq < success:
|
|
617
770
|
return CheckpointStatus.Unknown
|
|
618
771
|
|
|
619
|
-
def sync_checkpoint(
|
|
772
|
+
def sync_checkpoint(
|
|
773
|
+
self, wait: bool = False, timeout_s: Optional[float] = None
|
|
774
|
+
) -> str:
|
|
620
775
|
"""
|
|
621
776
|
Syncs this checkpoint to object store.
|
|
622
777
|
|
|
623
|
-
:param wait: If true, will block until the checkpoint sync
|
|
624
|
-
|
|
778
|
+
:param wait: If true, will block until the checkpoint sync operation
|
|
779
|
+
completes.
|
|
780
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
781
|
+
checkpoint to complete syncing.
|
|
625
782
|
|
|
626
783
|
:raises FelderaAPIError: If no checkpoints have been made.
|
|
784
|
+
:raises RuntimeError: If syncing the checkpoint fails.
|
|
627
785
|
"""
|
|
628
786
|
|
|
629
787
|
uuid = self.client.sync_checkpoint(self.name)
|
|
@@ -635,18 +793,22 @@ resume a paused pipeline."""
|
|
|
635
793
|
|
|
636
794
|
while True:
|
|
637
795
|
elapsed = time.monotonic() - start
|
|
638
|
-
if elapsed > timeout_s:
|
|
796
|
+
if timeout_s is not None and elapsed > timeout_s:
|
|
639
797
|
raise TimeoutError(
|
|
640
|
-
f"timeout ({timeout_s}s) reached while waiting for
|
|
641
|
-
|
|
642
|
-
}' to sync checkpoint '{uuid}'"
|
|
798
|
+
f"""timeout ({timeout_s}s) reached while waiting for \
|
|
799
|
+
pipeline '{self.name}' to sync checkpoint '{uuid}'"""
|
|
643
800
|
)
|
|
644
801
|
status = self.sync_checkpoint_status(uuid)
|
|
802
|
+
if status == CheckpointStatus.Failure:
|
|
803
|
+
raise RuntimeError(
|
|
804
|
+
f"failed to sync checkpoint '{uuid}': ", status.get_error()
|
|
805
|
+
)
|
|
806
|
+
|
|
645
807
|
if status in [CheckpointStatus.InProgress, CheckpointStatus.Unknown]:
|
|
646
808
|
time.sleep(0.1)
|
|
647
809
|
continue
|
|
648
810
|
|
|
649
|
-
|
|
811
|
+
break
|
|
650
812
|
|
|
651
813
|
return uuid
|
|
652
814
|
|
|
@@ -656,12 +818,17 @@ resume a paused pipeline."""
|
|
|
656
818
|
If the checkpoint is currently being synchronized, returns
|
|
657
819
|
`CheckpointStatus.Unknown`.
|
|
658
820
|
|
|
821
|
+
Failures are not raised as runtime errors and must be explicitly
|
|
822
|
+
checked.
|
|
823
|
+
|
|
659
824
|
:param uuid: The checkpoint uuid.
|
|
660
825
|
"""
|
|
661
826
|
|
|
662
827
|
resp = self.client.sync_checkpoint_status(self.name)
|
|
663
828
|
success = resp.get("success")
|
|
664
829
|
|
|
830
|
+
fail = resp.get("failure") or {}
|
|
831
|
+
|
|
665
832
|
if uuid == success:
|
|
666
833
|
return CheckpointStatus.Success
|
|
667
834
|
|
|
@@ -669,26 +836,35 @@ resume a paused pipeline."""
|
|
|
669
836
|
if uuid == fail.get("uuid"):
|
|
670
837
|
failure = CheckpointStatus.Failure
|
|
671
838
|
failure.error = fail.get("error", "")
|
|
839
|
+
logging.error(f"failed to sync checkpoint '{uuid}': {failure.error}")
|
|
672
840
|
return failure
|
|
673
841
|
|
|
842
|
+
if (success is None) or UUID(uuid) > UUID(success):
|
|
843
|
+
return CheckpointStatus.InProgress
|
|
844
|
+
|
|
674
845
|
return CheckpointStatus.Unknown
|
|
675
846
|
|
|
676
847
|
def query(self, query: str) -> Generator[Mapping[str, Any], None, None]:
|
|
677
848
|
"""
|
|
678
|
-
Executes an ad-hoc SQL query on this pipeline and returns a generator
|
|
679
|
-
|
|
680
|
-
|
|
849
|
+
Executes an ad-hoc SQL query on this pipeline and returns a generator
|
|
850
|
+
that yields the rows of the result as Python dictionaries. For
|
|
851
|
+
``INSERT`` and ``DELETE`` queries, consider using :meth:`.execute`
|
|
852
|
+
instead. All floating-point numbers are deserialized as Decimal objects
|
|
853
|
+
to avoid precision loss.
|
|
681
854
|
|
|
682
855
|
Note:
|
|
683
856
|
You can only ``SELECT`` from materialized tables and views.
|
|
684
857
|
|
|
685
858
|
Important:
|
|
686
|
-
This method is lazy. It returns a generator and is not evaluated
|
|
859
|
+
This method is lazy. It returns a generator and is not evaluated
|
|
860
|
+
until you consume the result.
|
|
687
861
|
|
|
688
862
|
:param query: The SQL query to be executed.
|
|
689
|
-
:return: A generator that yields the rows of the result as Python
|
|
863
|
+
:return: A generator that yields the rows of the result as Python
|
|
864
|
+
dictionaries.
|
|
690
865
|
|
|
691
|
-
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
866
|
+
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
867
|
+
state.
|
|
692
868
|
:raises FelderaAPIError: If querying a non materialized table or view.
|
|
693
869
|
:raises FelderaAPIError: If the query is invalid.
|
|
694
870
|
"""
|
|
@@ -697,8 +873,9 @@ resume a paused pipeline."""
|
|
|
697
873
|
|
|
698
874
|
def query_parquet(self, query: str, path: str):
|
|
699
875
|
"""
|
|
700
|
-
Executes an ad-hoc SQL query on this pipeline and saves the result to
|
|
701
|
-
If the extension isn't `parquet`,
|
|
876
|
+
Executes an ad-hoc SQL query on this pipeline and saves the result to
|
|
877
|
+
the specified path as a parquet file. If the extension isn't `parquet`,
|
|
878
|
+
it will be automatically appended to `path`.
|
|
702
879
|
|
|
703
880
|
Note:
|
|
704
881
|
You can only ``SELECT`` from materialized tables and views.
|
|
@@ -706,7 +883,8 @@ resume a paused pipeline."""
|
|
|
706
883
|
:param query: The SQL query to be executed.
|
|
707
884
|
:param path: The path of the parquet file.
|
|
708
885
|
|
|
709
|
-
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
886
|
+
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
887
|
+
state.
|
|
710
888
|
:raises FelderaAPIError: If querying a non materialized table or view.
|
|
711
889
|
:raises FelderaAPIError: If the query is invalid.
|
|
712
890
|
"""
|
|
@@ -715,37 +893,62 @@ resume a paused pipeline."""
|
|
|
715
893
|
|
|
716
894
|
def query_tabular(self, query: str) -> Generator[str, None, None]:
|
|
717
895
|
"""
|
|
718
|
-
Executes a SQL query on this pipeline and returns the result as a
|
|
896
|
+
Executes a SQL query on this pipeline and returns the result as a
|
|
897
|
+
formatted string.
|
|
719
898
|
|
|
720
899
|
Note:
|
|
721
900
|
You can only ``SELECT`` from materialized tables and views.
|
|
722
901
|
|
|
723
902
|
Important:
|
|
724
|
-
This method is lazy. It returns a generator and is not evaluated
|
|
903
|
+
This method is lazy. It returns a generator and is not evaluated
|
|
904
|
+
until you consume the result.
|
|
725
905
|
|
|
726
906
|
:param query: The SQL query to be executed.
|
|
727
|
-
:return: A generator that yields a string representing the query result
|
|
907
|
+
:return: A generator that yields a string representing the query result
|
|
908
|
+
in a human-readable, tabular format.
|
|
728
909
|
|
|
729
|
-
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
910
|
+
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
911
|
+
state.
|
|
730
912
|
:raises FelderaAPIError: If querying a non materialized table or view.
|
|
731
913
|
:raises FelderaAPIError: If the query is invalid.
|
|
732
914
|
"""
|
|
733
915
|
|
|
734
916
|
return self.client.query_as_text(self.name, query)
|
|
735
917
|
|
|
918
|
+
def query_hash(self, query: str):
|
|
919
|
+
"""
|
|
920
|
+
Executes an ad-hoc SQL query on this pipeline and returns the result
|
|
921
|
+
as a hash of the result set. This is useful for quickly checking
|
|
922
|
+
if the result set has changed without retrieving the entire result.
|
|
923
|
+
|
|
924
|
+
Note:
|
|
925
|
+
For a stable hash, the query must be deterministic which means
|
|
926
|
+
it should be sorted.
|
|
927
|
+
|
|
928
|
+
:param query: The SQL query to be executed.
|
|
929
|
+
|
|
930
|
+
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
931
|
+
state.
|
|
932
|
+
:raises FelderaAPIError: If querying a non materialized table or view.
|
|
933
|
+
:raises FelderaAPIError: If the query is invalid.
|
|
934
|
+
"""
|
|
935
|
+
return self.client.query_as_hash(self.name, query)
|
|
936
|
+
|
|
736
937
|
def execute(self, query: str):
|
|
737
938
|
"""
|
|
738
|
-
Executes an ad-hoc SQL query on the current pipeline, discarding its
|
|
739
|
-
Unlike the :meth:`.query` method which returns a generator for
|
|
740
|
-
this method processes the query
|
|
939
|
+
Executes an ad-hoc SQL query on the current pipeline, discarding its
|
|
940
|
+
result. Unlike the :meth:`.query` method which returns a generator for
|
|
941
|
+
retrieving query results lazily, this method processes the query
|
|
942
|
+
eagerly and fully before returning.
|
|
741
943
|
|
|
742
|
-
This method is suitable for SQL operations like ``INSERT`` and
|
|
743
|
-
|
|
744
|
-
|
|
944
|
+
This method is suitable for SQL operations like ``INSERT`` and
|
|
945
|
+
``DELETE``, where the user needs confirmation of successful query
|
|
946
|
+
execution, but does not require the query result. If the query fails,
|
|
947
|
+
an exception will be raised.
|
|
745
948
|
|
|
746
949
|
Important:
|
|
747
|
-
If you try to ``INSERT`` or ``DELETE`` data from a table while the
|
|
748
|
-
it will block until the pipeline is resumed.
|
|
950
|
+
If you try to ``INSERT`` or ``DELETE`` data from a table while the
|
|
951
|
+
pipeline is paused, it will block until the pipeline is resumed.
|
|
749
952
|
|
|
750
953
|
:param query: The SQL query to be executed.
|
|
751
954
|
|
|
@@ -756,6 +959,16 @@ resume a paused pipeline."""
|
|
|
756
959
|
gen = self.query_tabular(query)
|
|
757
960
|
deque(gen, maxlen=0)
|
|
758
961
|
|
|
962
|
+
def clear_storage(self):
|
|
963
|
+
"""
|
|
964
|
+
Clears the storage of the pipeline if it is currently in use.
|
|
965
|
+
This action cannot be canceled, and will delete all the pipeline
|
|
966
|
+
storage.
|
|
967
|
+
"""
|
|
968
|
+
|
|
969
|
+
if self.storage_status() == StorageStatus.INUSE:
|
|
970
|
+
self.client.clear_storage(self.name)
|
|
971
|
+
|
|
759
972
|
@property
|
|
760
973
|
def name(self) -> str:
|
|
761
974
|
"""
|
|
@@ -769,34 +982,124 @@ resume a paused pipeline."""
|
|
|
769
982
|
Return the program SQL code of the pipeline.
|
|
770
983
|
"""
|
|
771
984
|
|
|
772
|
-
self.refresh()
|
|
985
|
+
self.refresh(PipelineFieldSelector.ALL)
|
|
773
986
|
return self._inner.program_code
|
|
774
987
|
|
|
988
|
+
def modify(
|
|
989
|
+
self,
|
|
990
|
+
sql: Optional[str] = None,
|
|
991
|
+
udf_rust: Optional[str] = None,
|
|
992
|
+
udf_toml: Optional[str] = None,
|
|
993
|
+
program_config: Optional[Mapping[str, Any]] = None,
|
|
994
|
+
runtime_config: Optional[Mapping[str, Any]] = None,
|
|
995
|
+
description: Optional[str] = None,
|
|
996
|
+
):
|
|
997
|
+
"""
|
|
998
|
+
Modify the pipeline.
|
|
999
|
+
|
|
1000
|
+
Modify the values of pipeline attributes: SQL code, UDF Rust code,
|
|
1001
|
+
UDF Rust dependencies (TOML), program config, runtime config, and
|
|
1002
|
+
description. Only the provided attributes will be modified. Other
|
|
1003
|
+
attributes will remain unchanged.
|
|
1004
|
+
|
|
1005
|
+
The pipeline must be in the STOPPED state to be modified.
|
|
1006
|
+
|
|
1007
|
+
:raises FelderaAPIError: If the pipeline is not in a STOPPED state.
|
|
1008
|
+
"""
|
|
1009
|
+
|
|
1010
|
+
self.client.patch_pipeline(
|
|
1011
|
+
name=self._inner.name,
|
|
1012
|
+
sql=sql,
|
|
1013
|
+
udf_rust=udf_rust,
|
|
1014
|
+
udf_toml=udf_toml,
|
|
1015
|
+
program_config=program_config,
|
|
1016
|
+
runtime_config=runtime_config,
|
|
1017
|
+
description=description,
|
|
1018
|
+
)
|
|
1019
|
+
|
|
1020
|
+
def update_runtime(self):
|
|
1021
|
+
"""
|
|
1022
|
+
Recompile a pipeline with the Feldera runtime version included in the
|
|
1023
|
+
currently installed Feldera platform.
|
|
1024
|
+
|
|
1025
|
+
Use this endpoint after upgrading Feldera to rebuild pipelines that were
|
|
1026
|
+
compiled with older platform versions. In most cases, recompilation is not
|
|
1027
|
+
required—pipelines compiled with older versions will continue to run on the
|
|
1028
|
+
upgraded platform.
|
|
1029
|
+
|
|
1030
|
+
Situations where recompilation may be necessary:
|
|
1031
|
+
- To benefit from the latest bug fixes and performance optimizations.
|
|
1032
|
+
- When backward-incompatible changes are introduced in Feldera. In this case,
|
|
1033
|
+
attempting to start a pipeline compiled with an unsupported version will
|
|
1034
|
+
result in an error.
|
|
1035
|
+
|
|
1036
|
+
If the pipeline is already compiled with the current platform version,
|
|
1037
|
+
this operation is a no-op.
|
|
1038
|
+
|
|
1039
|
+
Note that recompiling the pipeline with a new platform version may change its
|
|
1040
|
+
query plan. If the modified pipeline is started from an existing checkpoint,
|
|
1041
|
+
it may require bootstrapping parts of its state from scratch. See Feldera
|
|
1042
|
+
documentation for details on the bootstrapping process.
|
|
1043
|
+
|
|
1044
|
+
:raises FelderaAPIError: If the pipeline is not in a STOPPED state.
|
|
1045
|
+
"""
|
|
1046
|
+
|
|
1047
|
+
self.client.update_pipeline_runtime(self._inner.name)
|
|
1048
|
+
|
|
1049
|
+
def storage_status(self) -> StorageStatus:
|
|
1050
|
+
"""
|
|
1051
|
+
Return the storage status of the pipeline.
|
|
1052
|
+
"""
|
|
1053
|
+
|
|
1054
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
1055
|
+
return StorageStatus.from_str(self._inner.storage_status)
|
|
1056
|
+
|
|
775
1057
|
def program_status(self) -> ProgramStatus:
|
|
776
1058
|
"""
|
|
777
1059
|
Return the program status of the pipeline.
|
|
778
1060
|
|
|
779
1061
|
Program status is the status of compilation of this SQL program.
|
|
780
|
-
We first compile the SQL program to Rust code, and then compile the
|
|
1062
|
+
We first compile the SQL program to Rust code, and then compile the
|
|
1063
|
+
Rust code to a binary.
|
|
781
1064
|
"""
|
|
782
1065
|
|
|
783
|
-
self.refresh()
|
|
1066
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
784
1067
|
return ProgramStatus.from_value(self._inner.program_status)
|
|
785
1068
|
|
|
1069
|
+
def testing_force_update_platform_version(self, platform_version: str):
|
|
1070
|
+
"""
|
|
1071
|
+
Used to simulate a pipeline compiled with a different platform version than the one currently in use.
|
|
1072
|
+
This is useful for testing platform upgrade behavior without actually upgrading Feldera.
|
|
1073
|
+
|
|
1074
|
+
This method is only available when Feldera runs with the "testing" unstable feature enabled.
|
|
1075
|
+
"""
|
|
1076
|
+
|
|
1077
|
+
self.client.testing_force_update_platform_version(
|
|
1078
|
+
name=self._inner.name, platform_version=platform_version
|
|
1079
|
+
)
|
|
1080
|
+
|
|
786
1081
|
def program_status_since(self) -> datetime:
|
|
787
1082
|
"""
|
|
788
1083
|
Return the timestamp when the current program status was set.
|
|
789
1084
|
"""
|
|
790
1085
|
|
|
791
|
-
self.refresh()
|
|
1086
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
792
1087
|
return datetime.fromisoformat(self._inner.program_status_since)
|
|
793
1088
|
|
|
1089
|
+
def platform_version(self) -> str:
|
|
1090
|
+
"""
|
|
1091
|
+
Return the Feldera platform witch which the program was compiled.
|
|
1092
|
+
"""
|
|
1093
|
+
|
|
1094
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
1095
|
+
return self._inner.platform_version
|
|
1096
|
+
|
|
794
1097
|
def udf_rust(self) -> str:
|
|
795
1098
|
"""
|
|
796
1099
|
Return the Rust code for UDFs.
|
|
797
1100
|
"""
|
|
798
1101
|
|
|
799
|
-
self.refresh()
|
|
1102
|
+
self.refresh(PipelineFieldSelector.ALL)
|
|
800
1103
|
return self._inner.udf_rust
|
|
801
1104
|
|
|
802
1105
|
def udf_toml(self) -> str:
|
|
@@ -804,7 +1107,7 @@ resume a paused pipeline."""
|
|
|
804
1107
|
Return the Rust dependencies required by UDFs (in the TOML format).
|
|
805
1108
|
"""
|
|
806
1109
|
|
|
807
|
-
self.refresh()
|
|
1110
|
+
self.refresh(PipelineFieldSelector.ALL)
|
|
808
1111
|
return self._inner.udf_toml
|
|
809
1112
|
|
|
810
1113
|
def program_config(self) -> Mapping[str, Any]:
|
|
@@ -812,23 +1115,40 @@ resume a paused pipeline."""
|
|
|
812
1115
|
Return the program config of the pipeline.
|
|
813
1116
|
"""
|
|
814
1117
|
|
|
815
|
-
self.refresh()
|
|
1118
|
+
self.refresh(PipelineFieldSelector.ALL)
|
|
816
1119
|
return self._inner.program_config
|
|
817
1120
|
|
|
818
|
-
def runtime_config(self) ->
|
|
1121
|
+
def runtime_config(self) -> RuntimeConfig:
|
|
819
1122
|
"""
|
|
820
1123
|
Return the runtime config of the pipeline.
|
|
821
1124
|
"""
|
|
822
1125
|
|
|
823
|
-
self.refresh()
|
|
824
|
-
return self._inner.runtime_config
|
|
1126
|
+
self.refresh(PipelineFieldSelector.ALL)
|
|
1127
|
+
return RuntimeConfig.from_dict(self._inner.runtime_config)
|
|
1128
|
+
|
|
1129
|
+
def set_runtime_config(self, runtime_config: RuntimeConfig):
|
|
1130
|
+
"""Updates the runtime config of the pipeline. The pipeline
|
|
1131
|
+
must be stopped. Changing some pipeline configuration, such
|
|
1132
|
+
as the number of workers, requires storage to be cleared.
|
|
1133
|
+
|
|
1134
|
+
For example, to set 'min_batch_size_records' on a pipeline::
|
|
1135
|
+
|
|
1136
|
+
runtime_config = pipeline.runtime_config()
|
|
1137
|
+
runtime_config.min_batch_size_records = 500
|
|
1138
|
+
pipeline.set_runtime_config(runtime_config)
|
|
1139
|
+
|
|
1140
|
+
"""
|
|
1141
|
+
|
|
1142
|
+
self.client.patch_pipeline(
|
|
1143
|
+
name=self._inner.name, runtime_config=runtime_config.to_dict()
|
|
1144
|
+
)
|
|
825
1145
|
|
|
826
1146
|
def id(self) -> str:
|
|
827
1147
|
"""
|
|
828
1148
|
Return the ID of the pipeline.
|
|
829
1149
|
"""
|
|
830
1150
|
|
|
831
|
-
self.refresh()
|
|
1151
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
832
1152
|
return self._inner.id
|
|
833
1153
|
|
|
834
1154
|
def description(self) -> str:
|
|
@@ -836,7 +1156,7 @@ resume a paused pipeline."""
|
|
|
836
1156
|
Return the description of the pipeline.
|
|
837
1157
|
"""
|
|
838
1158
|
|
|
839
|
-
self.refresh()
|
|
1159
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
840
1160
|
return self._inner.description
|
|
841
1161
|
|
|
842
1162
|
def tables(self) -> List[SQLTable]:
|
|
@@ -844,7 +1164,7 @@ resume a paused pipeline."""
|
|
|
844
1164
|
Return the tables of the pipeline.
|
|
845
1165
|
"""
|
|
846
1166
|
|
|
847
|
-
self.refresh()
|
|
1167
|
+
self.refresh(PipelineFieldSelector.ALL)
|
|
848
1168
|
return self._inner.tables
|
|
849
1169
|
|
|
850
1170
|
def views(self) -> List[SQLView]:
|
|
@@ -852,7 +1172,7 @@ resume a paused pipeline."""
|
|
|
852
1172
|
Return the views of the pipeline.
|
|
853
1173
|
"""
|
|
854
1174
|
|
|
855
|
-
self.refresh()
|
|
1175
|
+
self.refresh(PipelineFieldSelector.ALL)
|
|
856
1176
|
return self._inner.views
|
|
857
1177
|
|
|
858
1178
|
def created_at(self) -> datetime:
|
|
@@ -860,7 +1180,7 @@ resume a paused pipeline."""
|
|
|
860
1180
|
Return the creation time of the pipeline.
|
|
861
1181
|
"""
|
|
862
1182
|
|
|
863
|
-
self.refresh()
|
|
1183
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
864
1184
|
return datetime.fromisoformat(self._inner.created_at)
|
|
865
1185
|
|
|
866
1186
|
def version(self) -> int:
|
|
@@ -868,7 +1188,7 @@ resume a paused pipeline."""
|
|
|
868
1188
|
Return the version of the pipeline.
|
|
869
1189
|
"""
|
|
870
1190
|
|
|
871
|
-
self.refresh()
|
|
1191
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
872
1192
|
return self._inner.version
|
|
873
1193
|
|
|
874
1194
|
def program_version(self) -> int:
|
|
@@ -876,15 +1196,16 @@ resume a paused pipeline."""
|
|
|
876
1196
|
Return the program version of the pipeline.
|
|
877
1197
|
"""
|
|
878
1198
|
|
|
879
|
-
self.refresh()
|
|
1199
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
880
1200
|
return self._inner.program_version
|
|
881
1201
|
|
|
882
1202
|
def deployment_status_since(self) -> datetime:
|
|
883
1203
|
"""
|
|
884
|
-
Return the timestamp when the current deployment status of the pipeline
|
|
1204
|
+
Return the timestamp when the current deployment status of the pipeline
|
|
1205
|
+
was set.
|
|
885
1206
|
"""
|
|
886
1207
|
|
|
887
|
-
self.refresh()
|
|
1208
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
888
1209
|
return datetime.fromisoformat(self._inner.deployment_status_since)
|
|
889
1210
|
|
|
890
1211
|
def deployment_config(self) -> Mapping[str, Any]:
|
|
@@ -892,17 +1213,63 @@ resume a paused pipeline."""
|
|
|
892
1213
|
Return the deployment config of the pipeline.
|
|
893
1214
|
"""
|
|
894
1215
|
|
|
895
|
-
self.refresh()
|
|
1216
|
+
self.refresh(PipelineFieldSelector.ALL)
|
|
896
1217
|
return self._inner.deployment_config
|
|
897
1218
|
|
|
898
|
-
def deployment_desired_status(self) ->
|
|
1219
|
+
def deployment_desired_status(self) -> DeploymentDesiredStatus:
|
|
899
1220
|
"""
|
|
900
1221
|
Return the desired deployment status of the pipeline.
|
|
901
1222
|
This is the next state that the pipeline should transition to.
|
|
902
1223
|
"""
|
|
903
1224
|
|
|
904
|
-
self.refresh()
|
|
905
|
-
return
|
|
1225
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
1226
|
+
return DeploymentDesiredStatus.from_str(self._inner.deployment_desired_status)
|
|
1227
|
+
|
|
1228
|
+
def deployment_resources_desired_status(self) -> DeploymentResourcesDesiredStatus:
|
|
1229
|
+
"""
|
|
1230
|
+
Return the desired status of the the deployment resources.
|
|
1231
|
+
"""
|
|
1232
|
+
|
|
1233
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
1234
|
+
return DeploymentResourcesDesiredStatus.from_str(
|
|
1235
|
+
self._inner.deployment_resources_desired_status
|
|
1236
|
+
)
|
|
1237
|
+
|
|
1238
|
+
def deployment_resources_status(self) -> DeploymentResourcesStatus:
|
|
1239
|
+
"""
|
|
1240
|
+
Return the status of the deployment resources.
|
|
1241
|
+
"""
|
|
1242
|
+
|
|
1243
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
1244
|
+
return DeploymentResourcesStatus.from_str(
|
|
1245
|
+
self._inner.deployment_resources_status
|
|
1246
|
+
)
|
|
1247
|
+
|
|
1248
|
+
def deployment_runtime_desired_status(self) -> DeploymentRuntimeDesiredStatus:
|
|
1249
|
+
"""
|
|
1250
|
+
Return the deployment runtime desired status.
|
|
1251
|
+
"""
|
|
1252
|
+
|
|
1253
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
1254
|
+
return DeploymentRuntimeDesiredStatus.from_str(
|
|
1255
|
+
self._inner.deployment_runtime_desired_status
|
|
1256
|
+
)
|
|
1257
|
+
|
|
1258
|
+
def deployment_runtime_status(self) -> DeploymentRuntimeStatus:
|
|
1259
|
+
"""
|
|
1260
|
+
Return the deployment runtime status.
|
|
1261
|
+
"""
|
|
1262
|
+
|
|
1263
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
1264
|
+
return DeploymentRuntimeStatus.from_str(self._inner.deployment_runtime_status)
|
|
1265
|
+
|
|
1266
|
+
def deployment_runtime_status_details(self) -> Optional[dict]:
|
|
1267
|
+
"""
|
|
1268
|
+
Return the deployment runtime status details.
|
|
1269
|
+
"""
|
|
1270
|
+
|
|
1271
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
1272
|
+
return self._inner.deployment_runtime_status_details
|
|
906
1273
|
|
|
907
1274
|
def deployment_error(self) -> Mapping[str, Any]:
|
|
908
1275
|
"""
|
|
@@ -910,43 +1277,38 @@ resume a paused pipeline."""
|
|
|
910
1277
|
Returns an empty string if there is no error.
|
|
911
1278
|
"""
|
|
912
1279
|
|
|
913
|
-
self.refresh()
|
|
1280
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
914
1281
|
return self._inner.deployment_error
|
|
915
1282
|
|
|
916
1283
|
def deployment_location(self) -> str:
|
|
917
1284
|
"""
|
|
918
1285
|
Return the deployment location of the pipeline.
|
|
919
|
-
Deployment location is the location where the pipeline can be reached
|
|
1286
|
+
Deployment location is the location where the pipeline can be reached
|
|
1287
|
+
at runtime (a TCP port number or a URI).
|
|
920
1288
|
"""
|
|
921
1289
|
|
|
922
|
-
self.refresh()
|
|
1290
|
+
self.refresh(PipelineFieldSelector.STATUS)
|
|
923
1291
|
return self._inner.deployment_location
|
|
924
1292
|
|
|
925
|
-
def program_binary_url(self) -> str:
|
|
926
|
-
"""
|
|
927
|
-
Return the program binary URL of the pipeline.
|
|
928
|
-
This is the URL where the compiled program binary can be downloaded from.
|
|
929
|
-
"""
|
|
930
|
-
|
|
931
|
-
self.refresh()
|
|
932
|
-
return self._inner.program_binary_url
|
|
933
|
-
|
|
934
1293
|
def program_info(self) -> Mapping[str, Any]:
|
|
935
1294
|
"""
|
|
936
1295
|
Return the program info of the pipeline.
|
|
937
|
-
This is the output returned by the SQL compiler, including: the list of
|
|
1296
|
+
This is the output returned by the SQL compiler, including: the list of
|
|
1297
|
+
input and output connectors, the generated Rust code for the pipeline,
|
|
1298
|
+
and the SQL program schema.
|
|
938
1299
|
"""
|
|
939
1300
|
|
|
940
|
-
self.refresh()
|
|
1301
|
+
self.refresh(PipelineFieldSelector.ALL)
|
|
941
1302
|
return self._inner.program_info
|
|
942
1303
|
|
|
943
1304
|
def program_error(self) -> Mapping[str, Any]:
|
|
944
1305
|
"""
|
|
945
1306
|
Return the program error of the pipeline.
|
|
946
|
-
If there are no errors, the `exit_code` field inside both
|
|
1307
|
+
If there are no errors, the `exit_code` field inside both
|
|
1308
|
+
`sql_compilation` and `rust_compilation` will be 0.
|
|
947
1309
|
"""
|
|
948
1310
|
|
|
949
|
-
self.refresh()
|
|
1311
|
+
self.refresh(PipelineFieldSelector.ALL)
|
|
950
1312
|
return self._inner.program_error
|
|
951
1313
|
|
|
952
1314
|
def errors(self) -> List[Mapping[str, Any]]:
|
|
@@ -963,3 +1325,98 @@ resume a paused pipeline."""
|
|
|
963
1325
|
if derr:
|
|
964
1326
|
errors.append(derr)
|
|
965
1327
|
return errors
|
|
1328
|
+
|
|
1329
|
+
def support_bundle(
|
|
1330
|
+
self,
|
|
1331
|
+
output_path: Optional[str] = None,
|
|
1332
|
+
*,
|
|
1333
|
+
circuit_profile: bool = True,
|
|
1334
|
+
heap_profile: bool = True,
|
|
1335
|
+
metrics: bool = True,
|
|
1336
|
+
logs: bool = True,
|
|
1337
|
+
stats: bool = True,
|
|
1338
|
+
pipeline_config: bool = True,
|
|
1339
|
+
system_config: bool = True,
|
|
1340
|
+
) -> bytes:
|
|
1341
|
+
"""
|
|
1342
|
+
Generate a support bundle containing diagnostic information from this pipeline.
|
|
1343
|
+
|
|
1344
|
+
This method collects various diagnostic data from the pipeline including
|
|
1345
|
+
circuit profile, heap profile, metrics, logs, stats, and connector statistics,
|
|
1346
|
+
and packages them into a single ZIP file for support purposes.
|
|
1347
|
+
|
|
1348
|
+
:param output_path: Optional path to save the support bundle file. If None,
|
|
1349
|
+
the support bundle is only returned as bytes.
|
|
1350
|
+
:param circuit_profile: Whether to collect circuit profile data (default: True)
|
|
1351
|
+
:param heap_profile: Whether to collect heap profile data (default: True)
|
|
1352
|
+
:param metrics: Whether to collect metrics data (default: True)
|
|
1353
|
+
:param logs: Whether to collect logs data (default: True)
|
|
1354
|
+
:param stats: Whether to collect stats data (default: True)
|
|
1355
|
+
:param pipeline_config: Whether to collect pipeline configuration data (default: True)
|
|
1356
|
+
:param system_config: Whether to collect system configuration data (default: True)
|
|
1357
|
+
:return: The support bundle as bytes (ZIP archive)
|
|
1358
|
+
:raises FelderaAPIError: If the pipeline does not exist or if there's an error
|
|
1359
|
+
"""
|
|
1360
|
+
|
|
1361
|
+
# Build query parameters
|
|
1362
|
+
params = {}
|
|
1363
|
+
if not circuit_profile:
|
|
1364
|
+
params["circuit_profile"] = "false"
|
|
1365
|
+
if not heap_profile:
|
|
1366
|
+
params["heap_profile"] = "false"
|
|
1367
|
+
if not metrics:
|
|
1368
|
+
params["metrics"] = "false"
|
|
1369
|
+
if not logs:
|
|
1370
|
+
params["logs"] = "false"
|
|
1371
|
+
if not stats:
|
|
1372
|
+
params["stats"] = "false"
|
|
1373
|
+
if not pipeline_config:
|
|
1374
|
+
params["pipeline_config"] = "false"
|
|
1375
|
+
if not system_config:
|
|
1376
|
+
params["system_config"] = "false"
|
|
1377
|
+
|
|
1378
|
+
support_bundle_bytes = self.client.get_pipeline_support_bundle(
|
|
1379
|
+
self.name, params=params
|
|
1380
|
+
)
|
|
1381
|
+
|
|
1382
|
+
if output_path is not None:
|
|
1383
|
+
path = pathlib.Path(output_path)
|
|
1384
|
+
|
|
1385
|
+
# Ensure the file has .zip extension
|
|
1386
|
+
if path.suffix != ".zip":
|
|
1387
|
+
path = path.with_suffix(".zip")
|
|
1388
|
+
|
|
1389
|
+
with open(path, "wb") as f:
|
|
1390
|
+
f.write(support_bundle_bytes)
|
|
1391
|
+
|
|
1392
|
+
print(f"Support bundle written to {path}")
|
|
1393
|
+
|
|
1394
|
+
return support_bundle_bytes
|
|
1395
|
+
|
|
1396
|
+
def generate_completion_token(self, table_name: str, connector_name: str) -> str:
|
|
1397
|
+
"""
|
|
1398
|
+
Returns a completion token that can be passed to :meth:`.Pipeline.completion_token_status` to
|
|
1399
|
+
check whether the pipeline has finished processing all inputs received from the connector before
|
|
1400
|
+
the token was generated.
|
|
1401
|
+
"""
|
|
1402
|
+
|
|
1403
|
+
return self.client.generate_completion_token(
|
|
1404
|
+
self.name, table_name, connector_name
|
|
1405
|
+
)
|
|
1406
|
+
|
|
1407
|
+
def completion_token_status(self, token: str) -> CompletionTokenStatus:
|
|
1408
|
+
"""
|
|
1409
|
+
Returns the status of the completion token.
|
|
1410
|
+
"""
|
|
1411
|
+
|
|
1412
|
+
if self.client.completion_token_processed(self.name, token):
|
|
1413
|
+
return CompletionTokenStatus.COMPLETE
|
|
1414
|
+
else:
|
|
1415
|
+
return CompletionTokenStatus.IN_PROGRESS
|
|
1416
|
+
|
|
1417
|
+
def wait_for_token(self, token: str):
|
|
1418
|
+
"""
|
|
1419
|
+
Blocks until the pipeline processes all inputs represented by the completion token.
|
|
1420
|
+
"""
|
|
1421
|
+
|
|
1422
|
+
self.client.wait_for_token(self.name, token)
|