feldera 0.69.0__py3-none-any.whl → 0.189.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of feldera might be problematic. Click here for more details.

feldera/pipeline.py CHANGED
@@ -1,29 +1,46 @@
1
1
  import logging
2
2
  import time
3
3
  from datetime import datetime
4
+ import pathlib
4
5
 
5
6
  import pandas
7
+ from uuid import UUID
6
8
 
7
9
  from typing import List, Dict, Callable, Optional, Generator, Mapping, Any
10
+ from threading import Event
8
11
  from collections import deque
9
- from queue import Queue
10
12
 
11
13
  from feldera.rest.errors import FelderaAPIError
12
- from feldera.enums import PipelineStatus, ProgramStatus, CheckpointStatus
14
+ from feldera.enums import (
15
+ BootstrapPolicy,
16
+ CompletionTokenStatus,
17
+ PipelineFieldSelector,
18
+ PipelineStatus,
19
+ ProgramStatus,
20
+ CheckpointStatus,
21
+ TransactionStatus,
22
+ StorageStatus,
23
+ DeploymentDesiredStatus,
24
+ DeploymentResourcesDesiredStatus,
25
+ DeploymentResourcesStatus,
26
+ DeploymentRuntimeDesiredStatus,
27
+ DeploymentRuntimeStatus,
28
+ )
13
29
  from feldera.rest.pipeline import Pipeline as InnerPipeline
14
30
  from feldera.rest.feldera_client import FelderaClient
15
- from feldera._callback_runner import _CallbackRunnerInstruction, CallbackRunner
31
+ from feldera._callback_runner import CallbackRunner
16
32
  from feldera.output_handler import OutputHandler
17
33
  from feldera._helpers import ensure_dataframe_has_columns, chunk_dataframe
18
34
  from feldera.rest.sql_table import SQLTable
19
35
  from feldera.rest.sql_view import SQLView
36
+ from feldera.runtime_config import RuntimeConfig
37
+ from feldera.stats import PipelineStatistics
20
38
 
21
39
 
22
40
  class Pipeline:
23
41
  def __init__(self, client: FelderaClient):
24
42
  self.client: FelderaClient = client
25
43
  self._inner: InnerPipeline | None = None
26
- self.views_tx: List[Dict[str, Queue]] = []
27
44
 
28
45
  @staticmethod
29
46
  def _from_inner(inner: InnerPipeline, client: FelderaClient) -> "Pipeline":
@@ -31,28 +48,16 @@ class Pipeline:
31
48
  pipeline._inner = inner
32
49
  return pipeline
33
50
 
34
- def __setup_output_listeners(self):
35
- """
36
- Internal function used to set up the output listeners.
37
-
38
- :meta private:
39
- """
40
-
41
- for view_queue in self.views_tx:
42
- for view_name, queue in view_queue.items():
43
- # sends a message to the callback runner to start listening
44
- queue.put(_CallbackRunnerInstruction.PipelineStarted)
45
- # block until the callback runner is ready
46
- queue.join()
47
-
48
- def refresh(self):
51
+ def refresh(self, field_selector: PipelineFieldSelector):
49
52
  """
50
53
  Calls the backend to get the updated, latest version of the pipeline.
51
54
 
55
+ :param field_selector: Choose what pipeline information to refresh; see PipelineFieldSelector enum definition.
56
+
52
57
  :raises FelderaConnectionError: If there is an issue connecting to the backend.
53
58
  """
54
59
 
55
- self._inner = self.client.get_pipeline(self.name)
60
+ self._inner = self.client.get_pipeline(self.name, field_selector)
56
61
 
57
62
  def status(self) -> PipelineStatus:
58
63
  """
@@ -60,7 +65,7 @@ class Pipeline:
60
65
  """
61
66
 
62
67
  try:
63
- self.refresh()
68
+ self.refresh(PipelineFieldSelector.STATUS)
64
69
  return PipelineStatus.from_str(self._inner.deployment_status)
65
70
 
66
71
  except FelderaAPIError as err:
@@ -69,6 +74,40 @@ class Pipeline:
69
74
  else:
70
75
  raise err
71
76
 
77
+ def wait_for_status(
78
+ self, expected_status: PipelineStatus, timeout: Optional[int] = None
79
+ ) -> None:
80
+ """
81
+ Wait for the pipeline to reach the specified status.
82
+
83
+ :param expected_status: The status to wait for
84
+ :param timeout: Maximum time to wait in seconds. If None, waits forever (default: None)
85
+ :raises TimeoutError: If the expected status is not reached within the timeout
86
+ """
87
+ start_time = time.time()
88
+
89
+ while True:
90
+ current_status = self.status()
91
+ if current_status == expected_status:
92
+ return
93
+
94
+ if timeout is not None and time.time() - start_time >= timeout:
95
+ raise TimeoutError(
96
+ f"Pipeline did not reach {expected_status.name} status within {timeout} seconds"
97
+ )
98
+
99
+ time.sleep(1)
100
+
101
+ def stats(self) -> PipelineStatistics:
102
+ """Gets the pipeline metrics and performance counters."""
103
+
104
+ return PipelineStatistics.from_dict(self.client.get_pipeline_stats(self.name))
105
+
106
+ def logs(self) -> Generator[str, None, None]:
107
+ """Gets the pipeline logs."""
108
+
109
+ return self.client.get_pipeline_logs(self.name)
110
+
72
111
  def input_pandas(self, table_name: str, df: pandas.DataFrame, force: bool = False):
73
112
  """
74
113
  Push all rows in a pandas DataFrame to the pipeline.
@@ -99,14 +138,13 @@ class Pipeline:
99
138
 
100
139
  ensure_dataframe_has_columns(df)
101
140
 
102
- pipeline = self.client.get_pipeline(self.name)
141
+ pipeline = self.client.get_pipeline(self.name, PipelineFieldSelector.ALL)
103
142
  if table_name.lower() != "now" and table_name.lower() not in [
104
143
  tbl.name.lower() for tbl in pipeline.tables
105
144
  ]:
106
145
  raise ValueError(
107
- f"Cannot push to table '{
108
- table_name
109
- }': table with this name does not exist in the '{self.name}' pipeline"
146
+ f"Cannot push to table '{table_name}': table with this name"
147
+ f" does not exist in the '{self.name}' pipeline"
110
148
  )
111
149
  else:
112
150
  # consider validating the schema here
@@ -129,6 +167,7 @@ class Pipeline:
129
167
  data: Dict | list,
130
168
  update_format: str = "raw",
131
169
  force: bool = False,
170
+ wait: bool = True,
132
171
  ):
133
172
  """
134
173
  Push this JSON data to the specified table of the pipeline.
@@ -140,8 +179,9 @@ class Pipeline:
140
179
  :param data: The JSON encoded data to be pushed to the pipeline. The data should be in the form:
141
180
  `{'col1': 'val1', 'col2': 'val2'}` or `[{'col1': 'val1', 'col2': 'val2'}, {'col1': 'val1', 'col2': 'val2'}]`
142
181
  :param update_format: The update format of the JSON data to be pushed to the pipeline. Must be one of:
143
- "raw", "insert_delete". <https://docs.feldera.com/formats/json#the-insertdelete-format>
182
+ "raw", "insert_delete". https://docs.feldera.com/formats/json#the-insertdelete-format
144
183
  :param force: `True` to push data even if the pipeline is paused. `False` by default.
184
+ :param wait: If True, blocks until this input has been processed by the pipeline
145
185
 
146
186
  :raises ValueError: If the update format is invalid.
147
187
  :raises FelderaAPIError: If the pipeline is not in a valid state to push data.
@@ -164,6 +204,7 @@ class Pipeline:
164
204
  update_format=update_format,
165
205
  array=array,
166
206
  force=force,
207
+ wait=wait,
167
208
  )
168
209
 
169
210
  def pause_connector(self, table_name: str, connector_name: str):
@@ -175,7 +216,7 @@ class Pipeline:
175
216
  All connectors are RUNNING by default.
176
217
 
177
218
  Refer to the connector documentation for more information:
178
- <https://docs.feldera.com/connectors/#input-connector-orchestration>
219
+ https://docs.feldera.com/connectors/#input-connector-orchestration
179
220
 
180
221
  :param table_name: The name of the table that the connector is attached to.
181
222
  :param connector_name: The name of the connector to pause.
@@ -194,7 +235,7 @@ class Pipeline:
194
235
  All connectors are RUNNING by default.
195
236
 
196
237
  Refer to the connector documentation for more information:
197
- <https://docs.feldera.com/connectors/#input-connector-orchestration>
238
+ https://docs.feldera.com/connectors/#input-connector-orchestration
198
239
 
199
240
  :param table_name: The name of the table that the connector is attached to.
200
241
  :param connector_name: The name of the connector to resume.
@@ -207,23 +248,21 @@ class Pipeline:
207
248
  def listen(self, view_name: str) -> OutputHandler:
208
249
  """
209
250
  Follow the change stream (i.e., the output) of the provided view.
210
- Returns an output handler to read the changes.
251
+ Returns an output handle to read the changes.
211
252
 
212
- When the pipeline is shutdown, these listeners are dropped.
253
+ When the pipeline is stopped, the handle is dropped.
213
254
 
214
- You must call this method before starting the pipeline to get the entire output of the view.
215
- If this method is called once the pipeline has started, you will only get the output from that point onwards.
255
+ The handle will only receive changes from the point in time when the listener is created.
256
+ In order to receive all changes since the pipeline started, you can create the pipeline in the `PAUSED` state
257
+ using :meth:`start_paused`, attach listeners and unpause the pipeline using :meth:`resume`.
216
258
 
217
259
  :param view_name: The name of the view to listen to.
218
260
  """
219
261
 
220
- queue: Optional[Queue] = None
221
-
222
262
  if self.status() not in [PipelineStatus.PAUSED, PipelineStatus.RUNNING]:
223
- queue = Queue(maxsize=1)
224
- self.views_tx.append({view_name: queue})
263
+ raise RuntimeError("Pipeline must be running or paused to listen to output")
225
264
 
226
- handler = OutputHandler(self.client, self.name, view_name, queue)
265
+ handler = OutputHandler(self.client, self.name, view_name)
227
266
  handler.start()
228
267
 
229
268
  return handler
@@ -234,8 +273,9 @@ class Pipeline:
234
273
  """
235
274
  Run the given callback on each chunk of the output of the specified view.
236
275
 
237
- You must call this method before starting the pipeline to operate on the entire output.
238
- You can call this method after the pipeline has started, but you will only get the output from that point onwards.
276
+ The callback will only receive changes from the point in time when the listener is created.
277
+ In order to receive all changes since the pipeline started, you can create the pipeline in the `PAUSED` state
278
+ using :meth:`start_paused`, attach listeners and unpause the pipeline using :meth:`resume`.
239
279
 
240
280
  :param view_name: The name of the view.
241
281
  :param callback: The callback to run on each chunk. The callback should take two arguments:
@@ -253,34 +293,37 @@ class Pipeline:
253
293
 
254
294
  """
255
295
 
256
- queue: Optional[Queue] = None
257
-
258
296
  if self.status() not in [PipelineStatus.RUNNING, PipelineStatus.PAUSED]:
259
- queue = Queue(maxsize=1)
260
- self.views_tx.append({view_name: queue})
297
+ raise RuntimeError("Pipeline must be running or paused to listen to output")
261
298
 
262
- handler = CallbackRunner(self.client, self.name, view_name, callback, queue)
299
+ event = Event()
300
+ handler = CallbackRunner(
301
+ self.client, self.name, view_name, callback, lambda exception: None, event
302
+ )
263
303
  handler.start()
304
+ event.wait()
264
305
 
265
306
  def wait_for_completion(
266
- self, shutdown: bool = False, timeout_s: Optional[float] = None
307
+ self, force_stop: bool = False, timeout_s: float | None = None
267
308
  ):
268
309
  """
269
310
  Block until the pipeline has completed processing all input records.
270
311
 
271
- This method blocks until (1) all input connectors attached to the pipeline
272
- have finished reading their input data sources and issued end-of-input
273
- notifications to the pipeline, and (2) all inputs received from these
274
- connectors have been fully processed and corresponding outputs have been
275
- sent out through the output connectors.
312
+ This method blocks until (1) all input connectors attached to the
313
+ pipeline have finished reading their input data sources and issued
314
+ end-of-input notifications to the pipeline, and (2) all inputs received
315
+ from these connectors have been fully processed and corresponding
316
+ outputs have been sent out through the output connectors.
276
317
 
277
318
  This method will block indefinitely if at least one of the input
278
319
  connectors attached to the pipeline is a streaming connector, such as
279
320
  Kafka, that does not issue the end-of-input notification.
280
321
 
281
- :param shutdown: If True, the pipeline will be shutdown after completion. False by default.
282
- :param timeout_s: Optional. The maximum time (in seconds) to wait for the pipeline to complete.
283
- The default is None, which means wait indefinitely.
322
+ :param force_stop: If True, the pipeline will be forcibly stopped after
323
+ completion. False by default. No checkpoints will be made.
324
+ :param timeout_s: Optional. The maximum time (in seconds) to wait for
325
+ the pipeline to complete. The default is None, which means wait
326
+ indefinitely.
284
327
 
285
328
  :raises RuntimeError: If the pipeline returns unknown metrics.
286
329
  """
@@ -289,6 +332,7 @@ class Pipeline:
289
332
  PipelineStatus.RUNNING,
290
333
  PipelineStatus.INITIALIZING,
291
334
  PipelineStatus.PROVISIONING,
335
+ PipelineStatus.BOOTSTRAPPING,
292
336
  ]:
293
337
  raise RuntimeError("Pipeline must be running to wait for completion")
294
338
 
@@ -299,99 +343,44 @@ class Pipeline:
299
343
  elapsed = time.monotonic() - start_time
300
344
  if elapsed > timeout_s:
301
345
  raise TimeoutError(
302
- f"timeout ({timeout_s}s) reached while waiting for pipeline '{
303
- self.name
304
- }' to complete"
346
+ f"timeout ({timeout_s}s) reached while waiting for"
347
+ f" pipeline '{self.name}' to complete"
305
348
  )
306
349
  logging.debug(
307
- f"waiting for pipeline {self.name} to complete: elapsed time {
308
- elapsed
309
- }s, timeout: {timeout_s}s"
350
+ f"waiting for pipeline {self.name} to complete: elapsed"
351
+ f" time {elapsed}s, timeout: {timeout_s}s"
310
352
  )
311
353
 
312
- metrics: dict = self.client.get_pipeline_stats(self.name).get(
313
- "global_metrics"
314
- )
315
- pipeline_complete: bool = metrics.get("pipeline_complete")
316
-
354
+ pipeline_complete: bool = self.is_complete()
317
355
  if pipeline_complete is None:
318
356
  raise RuntimeError(
319
357
  "received unknown metrics from the pipeline, pipeline_complete is None"
320
358
  )
321
-
322
- if pipeline_complete:
359
+ elif pipeline_complete:
323
360
  break
324
361
 
325
362
  time.sleep(1)
326
363
 
327
- if shutdown:
328
- self.shutdown()
329
-
330
- def __failed_check(self, next):
331
- """
332
- Checks if the pipeline is in FAILED state and raises an error if it is.
333
- :meta private:
334
- """
335
- status = self.status()
336
- if status == PipelineStatus.FAILED:
337
- deployment_error = self.client.get_pipeline(self.name).deployment_error
338
- error_msg = deployment_error.get("message", "")
339
- raise RuntimeError(
340
- f"""Cannot {next} pipeline '{self.name}' in FAILED state.
341
- The pipeline must be in SHUTDOWN state before it can be started, but it is currently in FAILED state.
342
- Use `Pipeline.shutdown()` method to shut down the pipeline.
343
- Error Message:
344
- {error_msg}"""
345
- )
346
-
347
- def start(self, timeout_s: Optional[float] = None):
348
- """
349
- .. _start:
350
-
351
- Starts this pipeline.
352
-
353
- The pipeline must be in SHUTDOWN state to start.
354
- If the pipeline is in any other state, an error will be raised.
355
- If the pipeline is in PAUSED state, use `.meth:resume` instead.
356
- If the pipeline is in FAILED state, it must be shutdown before starting it again.
357
-
358
- :param timeout_s: The maximum time (in seconds) to wait for the pipeline to start.
359
-
360
- :raises RuntimeError: If the pipeline is not in SHUTDOWN state.
361
- """
362
-
363
- self.__failed_check("start")
364
- status = self.status()
365
- if status != PipelineStatus.SHUTDOWN:
366
- raise RuntimeError(
367
- f"""Cannot start pipeline '{self.name}' in state '{str(status.name)}'.
368
- The pipeline must be in SHUTDOWN state before it can be started.
369
- You can either shut down the pipeline using the `Pipeline.shutdown()` method or use `Pipeline.resume()` to \
370
- resume a paused pipeline."""
371
- )
372
-
373
- self.client.pause_pipeline(
374
- self.name, "Unable to START the pipeline.", timeout_s
375
- )
376
- self.__setup_output_listeners()
377
- self.resume(timeout_s)
364
+ if force_stop:
365
+ self.stop(force=True)
378
366
 
379
- def restart(self, timeout_s: Optional[float] = None):
367
+ def is_complete(self) -> bool:
380
368
  """
381
- Restarts the pipeline.
382
-
383
- This method **SHUTS DOWN** the pipeline regardless of its current state and then starts it again.
369
+ Check if the pipeline has completed processing all input records.
384
370
 
385
- :param timeout_s: The maximum time (in seconds) to wait for the pipeline to restart.
371
+ Returns True if (1) all input connectors attached to the
372
+ pipeline have finished reading their input data sources and issued
373
+ end-of-input notifications to the pipeline, and (2) all inputs received
374
+ from these connectors have been fully processed and corresponding
375
+ outputs have been sent out through the output connectors.
386
376
  """
387
377
 
388
- self.shutdown(timeout_s)
389
- self.start(timeout_s)
378
+ return self.stats().global_metrics.pipeline_complete
390
379
 
391
380
  def wait_for_idle(
392
381
  self,
393
382
  idle_interval_s: float = 5.0,
394
- timeout_s: float = 600.0,
383
+ timeout_s: float | None = None,
395
384
  poll_interval_s: float = 0.2,
396
385
  ):
397
386
  """
@@ -411,17 +400,15 @@ resume a paused pipeline."""
411
400
  :raises RuntimeError: If the metrics are missing or the timeout was
412
401
  reached.
413
402
  """
414
- if idle_interval_s > timeout_s:
403
+ if timeout_s is not None and idle_interval_s > timeout_s:
415
404
  raise ValueError(
416
- f"idle interval ({idle_interval_s}s) cannot be larger than timeout ({
417
- timeout_s
418
- }s)"
405
+ f"idle interval ({idle_interval_s}s) cannot be larger than"
406
+ f" timeout ({timeout_s}s)"
419
407
  )
420
- if poll_interval_s > timeout_s:
408
+ if timeout_s is not None and poll_interval_s > timeout_s:
421
409
  raise ValueError(
422
- f"poll interval ({poll_interval_s}s) cannot be larger than timeout ({
423
- timeout_s
424
- }s)"
410
+ f"poll interval ({poll_interval_s}s) cannot be larger than"
411
+ f" timeout ({timeout_s}s)"
425
412
  )
426
413
  if poll_interval_s > idle_interval_s:
427
414
  raise ValueError(
@@ -436,18 +423,17 @@ resume a paused pipeline."""
436
423
  now_s = time.monotonic()
437
424
 
438
425
  # Metrics retrieval
439
- metrics: dict = self.client.get_pipeline_stats(self.name).get(
440
- "global_metrics"
441
- )
442
- total_input_records: int | None = metrics.get("total_input_records")
443
- total_processed_records: int | None = metrics.get("total_processed_records")
444
- if total_input_records is None:
426
+ metrics = self.stats().global_metrics
427
+ total_input_records = metrics.total_input_records
428
+ total_processed_records = metrics.total_processed_records
429
+ if metrics.total_input_records is None:
445
430
  raise RuntimeError(
446
431
  "total_input_records is missing from the pipeline metrics"
447
432
  )
448
- if total_processed_records is None:
433
+ if metrics.total_processed_records is None:
449
434
  raise RuntimeError(
450
- "total_processed_records is missing from the pipeline metrics"
435
+ """total_processed_records is missing from the pipeline \
436
+ metrics"""
451
437
  )
452
438
 
453
439
  # Idle check
@@ -465,80 +451,236 @@ resume a paused pipeline."""
465
451
  return
466
452
 
467
453
  # Timeout
468
- if now_s - start_time_s >= timeout_s:
454
+ if timeout_s is not None and now_s - start_time_s >= timeout_s:
469
455
  raise RuntimeError(f"waiting for idle reached timeout ({timeout_s}s)")
470
456
  time.sleep(poll_interval_s)
471
457
 
472
- def pause(self, timeout_s: Optional[float] = None):
458
+ def activate(
459
+ self, wait: bool = True, timeout_s: Optional[float] = None
460
+ ) -> Optional[PipelineStatus]:
461
+ """
462
+ Activates the pipeline when starting from STANDBY mode. Only applicable
463
+ when the pipeline is starting from a checkpoint in object store.
464
+
465
+ :param wait: Set True to wait for the pipeline to activate. True by
466
+ default
467
+ :param timeout_s: The maximum time (in seconds) to wait for the
468
+ pipeline to pause.
469
+ """
470
+
471
+ return self.client.activate_pipeline(self.name, wait=wait, timeout_s=timeout_s)
472
+
473
+ def start(
474
+ self,
475
+ bootstrap_policy: Optional[BootstrapPolicy] = None,
476
+ wait: bool = True,
477
+ timeout_s: Optional[float] = None,
478
+ ):
479
+ """
480
+ .. _start:
481
+
482
+ Starts this pipeline.
483
+
484
+ - The pipeline must be in STOPPED state to start.
485
+ - If the pipeline is in any other state, an error will be raised.
486
+ - If the pipeline is in PAUSED state, use `.meth:resume` instead.
487
+
488
+ :param bootstrap_policy: The bootstrap policy to use.
489
+ :param timeout_s: The maximum time (in seconds) to wait for the
490
+ pipeline to start.
491
+ :param wait: Set True to wait for the pipeline to start. True by default
492
+
493
+ :raises RuntimeError: If the pipeline is not in STOPPED state.
494
+ """
495
+
496
+ self.client.start_pipeline(
497
+ self.name, bootstrap_policy=bootstrap_policy, wait=wait, timeout_s=timeout_s
498
+ )
499
+
500
+ def start_paused(
501
+ self,
502
+ bootstrap_policy: Optional[BootstrapPolicy] = None,
503
+ wait: bool = True,
504
+ timeout_s: Optional[float] = None,
505
+ ):
506
+ """
507
+ Starts the pipeline in the paused state.
508
+ """
509
+
510
+ return self.client.start_pipeline_as_paused(
511
+ self.name, bootstrap_policy=bootstrap_policy, wait=wait, timeout_s=timeout_s
512
+ )
513
+
514
+ def start_standby(
515
+ self,
516
+ bootstrap_policy: Optional[BootstrapPolicy] = None,
517
+ wait: bool = True,
518
+ timeout_s: Optional[float] = None,
519
+ ):
520
+ """
521
+ Starts the pipeline in the standby state.
522
+ """
523
+
524
+ self.client.start_pipeline_as_standby(
525
+ self.name, bootstrap_policy=bootstrap_policy, wait=wait, timeout_s=timeout_s
526
+ )
527
+
528
+ def restart(
529
+ self,
530
+ bootstrap_policy: Optional[BootstrapPolicy] = None,
531
+ timeout_s: Optional[float] = None,
532
+ ):
533
+ """
534
+ Restarts the pipeline.
535
+
536
+ This method forcibly **STOPS** the pipeline regardless of its current
537
+ state and then starts it again. No checkpoints are made when stopping
538
+ the pipeline.
539
+
540
+ :param bootstrap_policy: The bootstrap policy to use.
541
+ :param timeout_s: The maximum time (in seconds) to wait for the
542
+ pipeline to restart.
543
+ """
544
+
545
+ self.stop(force=True, timeout_s=timeout_s)
546
+ self.start(bootstrap_policy=bootstrap_policy, timeout_s=timeout_s)
547
+
548
+ def pause(self, wait: bool = True, timeout_s: Optional[float] = None):
473
549
  """
474
550
  Pause the pipeline.
475
551
 
476
- The pipeline can only transition to the PAUSED state from the RUNNING state.
477
- If the pipeline is already paused, it will remain in the PAUSED state.
552
+ The pipeline can only transition to the PAUSED state from the RUNNING
553
+ state. If the pipeline is already paused, it will remain in the PAUSED
554
+ state.
555
+
556
+ :param wait: Set True to wait for the pipeline to pause. True by default
557
+ :param timeout_s: The maximum time (in seconds) to wait for the
558
+ pipeline to pause.
559
+ """
478
560
 
479
- :param timeout_s: The maximum time (in seconds) to wait for the pipeline to pause.
561
+ self.client.pause_pipeline(self.name, wait=wait, timeout_s=timeout_s)
480
562
 
481
- :raises FelderaAPIError: If the pipeline is in FAILED state.
563
+ def stop(self, force: bool, wait: bool = True, timeout_s: Optional[float] = None):
482
564
  """
565
+ Stops the pipeline.
483
566
 
484
- self.__failed_check("pause")
485
- self.client.pause_pipeline(self.name, timeout_s=timeout_s)
567
+ Stops the pipeline regardless of its current state.
486
568
 
487
- def shutdown(self, timeout_s: Optional[float] = None):
569
+ :param force: Set True to immediately scale compute resources to zero.
570
+ Set False to automatically checkpoint before stopping.
571
+ :param wait: Set True to gracefully shutdown listeners and wait for the
572
+ pipeline to stop. True by default.
573
+ :param timeout_s: The maximum time (in seconds) to wait for the
574
+ pipeline to stop.
488
575
  """
489
- Shut down the pipeline.
490
576
 
491
- Shuts down the pipeline regardless of its current state.
577
+ self.client.stop_pipeline(
578
+ self.name, force=force, wait=wait, timeout_s=timeout_s
579
+ )
580
+
581
+ def approve(self):
582
+ """
583
+ Approves the pipeline to proceed with bootstrapping.
492
584
 
493
- :param timeout_s: The maximum time (in seconds) to wait for the pipeline to shut down.
585
+ This method is used when a pipeline has been started with
586
+ `bootstrap_policy=BootstrapPolicy.AWAIT_APPROVAL` and is currently in the
587
+ AWAITINGAPPROVAL state. The pipeline will wait for explicit user approval
588
+ before proceeding with the bootstrapping process.
494
589
  """
495
590
 
496
- if len(self.views_tx) > 0:
497
- for _, queue in self.views_tx.pop().items():
498
- # sends a message to the callback runner to stop listening
499
- queue.put(_CallbackRunnerInstruction.RanToCompletion)
500
- # block until the callback runner has been stopped
501
- queue.join()
591
+ self.client.approve_pipeline(self.name)
592
+
593
+ def resume(self, wait: bool = True, timeout_s: Optional[float] = None):
594
+ """
595
+ Resumes the pipeline from the PAUSED state. If the pipeline is already
596
+ running, it will remain in the RUNNING state.
597
+
598
+ :param wait: Set True to wait for the pipeline to resume. True by default
599
+ :param timeout_s: The maximum time (in seconds) to wait for the
600
+ pipeline to resume.
601
+ """
602
+
603
+ self.client.resume_pipeline(self.name, wait=wait, timeout_s=timeout_s)
604
+
605
+ def start_transaction(self) -> int:
606
+ """
607
+ Start a new transaction.
608
+
609
+ :return: Transaction ID.
610
+
611
+ :raises FelderaAPIError: If the pipeline fails to start a transaction, e.g., if the pipeline is not running or
612
+ there is already an active transaction.
613
+ """
502
614
 
503
- self.client.shutdown_pipeline(self.name, timeout_s=timeout_s)
615
+ return self.client.start_transaction(self.name)
504
616
 
505
- def suspend(self, timeout_s: Optional[float] = None):
617
+ def commit_transaction(
618
+ self,
619
+ transaction_id: Optional[int] = None,
620
+ wait: bool = True,
621
+ timeout_s: Optional[float] = None,
622
+ ):
506
623
  """
507
- Suspends the pipeline to storage.
624
+ Commit the currently active transaction.
625
+
626
+ :param transaction_id: If provided, the function verifies that the currently active transaction matches this ID.
627
+ If the active transaction ID does not match, the function raises an error.
508
628
 
509
- :param timeout_s: The maximum time (in seconds) to wait for the pipeline to suspend.
629
+ :param wait: If True, the function blocks until the transaction either commits successfully or the timeout is reached.
630
+ If False, the function initiates the commit and returns immediately without waiting for completion. The default value is True.
631
+
632
+ :param timeout_s: Maximum time (in seconds) to wait for the transaction to commit when `wait` is True.
633
+ If None, the function will wait indefinitely.
634
+
635
+ :raises RuntimeError: If there is currently no transaction in progress.
636
+ :raises ValueError: If the provided `transaction_id` does not match the current transaction.
637
+ :raises TimeoutError: If the transaction does not commit within the specified timeout (when `wait` is True).
638
+ :raises FelderaAPIError: If the pipeline fails to commit a transaction.
510
639
  """
511
640
 
512
- if len(self.views_tx) > 0:
513
- for _, queue in self.views_tx.pop().items():
514
- # sends a message to the callback runner to stop listening
515
- queue.put(_CallbackRunnerInstruction.RanToCompletion)
516
- # block until the callback runner has been stopped
517
- queue.join()
641
+ self.client.commit_transaction(self.name, transaction_id, wait, timeout_s)
642
+
643
+ def transaction_status(self) -> TransactionStatus:
644
+ """
645
+ Get pipeline's transaction handling status.
518
646
 
519
- self.client.suspend_pipeline(self.name, timeout_s=timeout_s)
647
+ :return: Current transaction handling status of the pipeline.
520
648
 
521
- def resume(self, timeout_s: Optional[float] = None):
649
+ :raises FelderaAPIError: If pipeline's status couldn't be read, e.g., because the pipeline is not currently running.
522
650
  """
523
- Resumes the pipeline from the PAUSED state. If the pipeline is already running, it will remain in the RUNNING state.
524
651
 
525
- :param timeout_s: The maximum time (in seconds) to wait for the pipeline to shut down.
652
+ return self.stats().global_metrics.transaction_status
526
653
 
527
- :raises FelderaAPIError: If the pipeline is in FAILED state.
654
+ def transaction_id(self) -> Optional[int]:
528
655
  """
656
+ Gets the ID of the currently active transaction or None if there is no active transaction.
529
657
 
530
- self.__failed_check("resume")
531
- self.client.start_pipeline(self.name, timeout_s=timeout_s)
658
+ :return: The ID of the transaction.
659
+ """
660
+
661
+ transaction_id = self.stats().global_metrics.transaction_id
662
+
663
+ if transaction_id == 0:
664
+ return None
665
+ else:
666
+ return transaction_id
532
667
 
533
- def delete(self):
668
+ def delete(self, clear_storage: bool = False):
534
669
  """
535
670
  Deletes the pipeline.
536
671
 
537
- The pipeline must be shutdown before it can be deleted.
672
+ The pipeline must be stopped, and the storage cleared before it can be
673
+ deleted.
538
674
 
539
- :raises FelderaAPIError: If the pipeline is not in SHUTDOWN state.
675
+ :param clear_storage: True if the storage should be cleared before
676
+ deletion. False by default
677
+
678
+ :raises FelderaAPIError: If the pipeline is not in STOPPED state or the
679
+ storage is still bound.
540
680
  """
541
681
 
682
+ if clear_storage:
683
+ self.clear_storage()
542
684
  self.client.delete_pipeline(self.name)
543
685
 
544
686
  @staticmethod
@@ -551,21 +693,35 @@ resume a paused pipeline."""
551
693
  """
552
694
 
553
695
  try:
554
- inner = client.get_pipeline(name)
696
+ inner = client.get_pipeline(name, PipelineFieldSelector.ALL)
555
697
  return Pipeline._from_inner(inner, client)
556
698
  except FelderaAPIError as err:
557
699
  if err.status_code == 404:
558
- raise RuntimeError(f"Pipeline with name {name} not found")
700
+ err.message = f"Pipeline with name {name} not found"
701
+ raise err
702
+
703
+ @staticmethod
704
+ def all(client: FelderaClient) -> List["Pipeline"]:
705
+ """
706
+ Get all pipelines.
707
+
708
+ :param client: The FelderaClient instance.
709
+ :return: A list of Pipeline objects.
710
+ """
711
+
712
+ return [Pipeline._from_inner(p, client) for p in client.pipelines()]
559
713
 
560
- def checkpoint(self, wait: bool = False, timeout_s=300) -> int:
714
+ def checkpoint(self, wait: bool = False, timeout_s: Optional[float] = None) -> int:
561
715
  """
562
- Checkpoints this pipeline, if fault-tolerance is enabled.
563
- Fault Tolerance in Feldera: <https://docs.feldera.com/pipelines/fault-tolerance/>
716
+ Checkpoints this pipeline.
564
717
 
565
718
  :param wait: If true, will block until the checkpoint completes.
566
- :param timeout_s: The maximum time (in seconds) to wait for the checkpoint to complete.
719
+ :param timeout_s: The maximum time (in seconds) to wait for the
720
+ checkpoint to complete.
567
721
 
568
- :raises FelderaAPIError: If checkpointing is not enabled.
722
+ :return: The checkpoint sequence number.
723
+
724
+ :raises FelderaAPIError: If enterprise features are not enabled.
569
725
  """
570
726
 
571
727
  seq = self.client.checkpoint_pipeline(self.name)
@@ -577,20 +733,17 @@ resume a paused pipeline."""
577
733
 
578
734
  while True:
579
735
  elapsed = time.monotonic() - start
580
- if elapsed > timeout_s:
736
+ if timeout_s is not None and elapsed > timeout_s:
581
737
  raise TimeoutError(
582
- f"timeout ({timeout_s}s) reached while waiting for pipeline '{
583
- self.name
584
- }' to make checkpoint '{seq}'"
738
+ f"""timeout ({timeout_s}s) reached while waiting for \
739
+ pipeline '{self.name}' to make checkpoint '{seq}'"""
585
740
  )
586
741
  status = self.checkpoint_status(seq)
587
742
  if status == CheckpointStatus.InProgress:
588
743
  time.sleep(0.1)
589
744
  continue
590
745
 
591
- return status
592
-
593
- return seq
746
+ return seq
594
747
 
595
748
  def checkpoint_status(self, seq: int) -> CheckpointStatus:
596
749
  """
@@ -616,14 +769,19 @@ resume a paused pipeline."""
616
769
  if seq < success:
617
770
  return CheckpointStatus.Unknown
618
771
 
619
- def sync_checkpoint(self, wait: bool = False, timeout_s=300) -> str:
772
+ def sync_checkpoint(
773
+ self, wait: bool = False, timeout_s: Optional[float] = None
774
+ ) -> str:
620
775
  """
621
776
  Syncs this checkpoint to object store.
622
777
 
623
- :param wait: If true, will block until the checkpoint sync opeartion completes.
624
- :param timeout_s: The maximum time (in seconds) to wait for the checkpoint to complete syncing.
778
+ :param wait: If true, will block until the checkpoint sync operation
779
+ completes.
780
+ :param timeout_s: The maximum time (in seconds) to wait for the
781
+ checkpoint to complete syncing.
625
782
 
626
783
  :raises FelderaAPIError: If no checkpoints have been made.
784
+ :raises RuntimeError: If syncing the checkpoint fails.
627
785
  """
628
786
 
629
787
  uuid = self.client.sync_checkpoint(self.name)
@@ -635,18 +793,22 @@ resume a paused pipeline."""
635
793
 
636
794
  while True:
637
795
  elapsed = time.monotonic() - start
638
- if elapsed > timeout_s:
796
+ if timeout_s is not None and elapsed > timeout_s:
639
797
  raise TimeoutError(
640
- f"timeout ({timeout_s}s) reached while waiting for pipeline '{
641
- self.name
642
- }' to sync checkpoint '{uuid}'"
798
+ f"""timeout ({timeout_s}s) reached while waiting for \
799
+ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
643
800
  )
644
801
  status = self.sync_checkpoint_status(uuid)
802
+ if status == CheckpointStatus.Failure:
803
+ raise RuntimeError(
804
+ f"failed to sync checkpoint '{uuid}': ", status.get_error()
805
+ )
806
+
645
807
  if status in [CheckpointStatus.InProgress, CheckpointStatus.Unknown]:
646
808
  time.sleep(0.1)
647
809
  continue
648
810
 
649
- return status
811
+ break
650
812
 
651
813
  return uuid
652
814
 
@@ -656,12 +818,17 @@ resume a paused pipeline."""
656
818
  If the checkpoint is currently being synchronized, returns
657
819
  `CheckpointStatus.Unknown`.
658
820
 
821
+ Failures are not raised as runtime errors and must be explicitly
822
+ checked.
823
+
659
824
  :param uuid: The checkpoint uuid.
660
825
  """
661
826
 
662
827
  resp = self.client.sync_checkpoint_status(self.name)
663
828
  success = resp.get("success")
664
829
 
830
+ fail = resp.get("failure") or {}
831
+
665
832
  if uuid == success:
666
833
  return CheckpointStatus.Success
667
834
 
@@ -669,26 +836,35 @@ resume a paused pipeline."""
669
836
  if uuid == fail.get("uuid"):
670
837
  failure = CheckpointStatus.Failure
671
838
  failure.error = fail.get("error", "")
839
+ logging.error(f"failed to sync checkpoint '{uuid}': {failure.error}")
672
840
  return failure
673
841
 
842
+ if (success is None) or UUID(uuid) > UUID(success):
843
+ return CheckpointStatus.InProgress
844
+
674
845
  return CheckpointStatus.Unknown
675
846
 
676
847
  def query(self, query: str) -> Generator[Mapping[str, Any], None, None]:
677
848
  """
678
- Executes an ad-hoc SQL query on this pipeline and returns a generator that yields the rows of the result as Python dictionaries.
679
- For ``INSERT`` and ``DELETE`` queries, consider using :meth:`.execute` instead.
680
- All floating-point numbers are deserialized as Decimal objects to avoid precision loss.
849
+ Executes an ad-hoc SQL query on this pipeline and returns a generator
850
+ that yields the rows of the result as Python dictionaries. For
851
+ ``INSERT`` and ``DELETE`` queries, consider using :meth:`.execute`
852
+ instead. All floating-point numbers are deserialized as Decimal objects
853
+ to avoid precision loss.
681
854
 
682
855
  Note:
683
856
  You can only ``SELECT`` from materialized tables and views.
684
857
 
685
858
  Important:
686
- This method is lazy. It returns a generator and is not evaluated until you consume the result.
859
+ This method is lazy. It returns a generator and is not evaluated
860
+ until you consume the result.
687
861
 
688
862
  :param query: The SQL query to be executed.
689
- :return: A generator that yields the rows of the result as Python dictionaries.
863
+ :return: A generator that yields the rows of the result as Python
864
+ dictionaries.
690
865
 
691
- :raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED state.
866
+ :raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
867
+ state.
692
868
  :raises FelderaAPIError: If querying a non materialized table or view.
693
869
  :raises FelderaAPIError: If the query is invalid.
694
870
  """
@@ -697,8 +873,9 @@ resume a paused pipeline."""
697
873
 
698
874
  def query_parquet(self, query: str, path: str):
699
875
  """
700
- Executes an ad-hoc SQL query on this pipeline and saves the result to the specified path as a parquet file.
701
- If the extension isn't `parquet`, it will be automatically appended to `path`.
876
+ Executes an ad-hoc SQL query on this pipeline and saves the result to
877
+ the specified path as a parquet file. If the extension isn't `parquet`,
878
+ it will be automatically appended to `path`.
702
879
 
703
880
  Note:
704
881
  You can only ``SELECT`` from materialized tables and views.
@@ -706,7 +883,8 @@ resume a paused pipeline."""
706
883
  :param query: The SQL query to be executed.
707
884
  :param path: The path of the parquet file.
708
885
 
709
- :raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED state.
886
+ :raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
887
+ state.
710
888
  :raises FelderaAPIError: If querying a non materialized table or view.
711
889
  :raises FelderaAPIError: If the query is invalid.
712
890
  """
@@ -715,37 +893,62 @@ resume a paused pipeline."""
715
893
 
716
894
  def query_tabular(self, query: str) -> Generator[str, None, None]:
717
895
  """
718
- Executes a SQL query on this pipeline and returns the result as a formatted string.
896
+ Executes a SQL query on this pipeline and returns the result as a
897
+ formatted string.
719
898
 
720
899
  Note:
721
900
  You can only ``SELECT`` from materialized tables and views.
722
901
 
723
902
  Important:
724
- This method is lazy. It returns a generator and is not evaluated until you consume the result.
903
+ This method is lazy. It returns a generator and is not evaluated
904
+ until you consume the result.
725
905
 
726
906
  :param query: The SQL query to be executed.
727
- :return: A generator that yields a string representing the query result in a human-readable, tabular format.
907
+ :return: A generator that yields a string representing the query result
908
+ in a human-readable, tabular format.
728
909
 
729
- :raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED state.
910
+ :raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
911
+ state.
730
912
  :raises FelderaAPIError: If querying a non materialized table or view.
731
913
  :raises FelderaAPIError: If the query is invalid.
732
914
  """
733
915
 
734
916
  return self.client.query_as_text(self.name, query)
735
917
 
918
+ def query_hash(self, query: str):
919
+ """
920
+ Executes an ad-hoc SQL query on this pipeline and returns the result
921
+ as a hash of the result set. This is useful for quickly checking
922
+ if the result set has changed without retrieving the entire result.
923
+
924
+ Note:
925
+ For a stable hash, the query must be deterministic which means
926
+ it should be sorted.
927
+
928
+ :param query: The SQL query to be executed.
929
+
930
+ :raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
931
+ state.
932
+ :raises FelderaAPIError: If querying a non materialized table or view.
933
+ :raises FelderaAPIError: If the query is invalid.
934
+ """
935
+ return self.client.query_as_hash(self.name, query)
936
+
736
937
  def execute(self, query: str):
737
938
  """
738
- Executes an ad-hoc SQL query on the current pipeline, discarding its result.
739
- Unlike the :meth:`.query` method which returns a generator for retrieving query results lazily,
740
- this method processes the query eagerly and fully before returning.
939
+ Executes an ad-hoc SQL query on the current pipeline, discarding its
940
+ result. Unlike the :meth:`.query` method which returns a generator for
941
+ retrieving query results lazily, this method processes the query
942
+ eagerly and fully before returning.
741
943
 
742
- This method is suitable for SQL operations like ``INSERT`` and ``DELETE``, where the user needs
743
- confirmation of successful query execution, but does not require the query result.
744
- If the query fails, an exception will be raised.
944
+ This method is suitable for SQL operations like ``INSERT`` and
945
+ ``DELETE``, where the user needs confirmation of successful query
946
+ execution, but does not require the query result. If the query fails,
947
+ an exception will be raised.
745
948
 
746
949
  Important:
747
- If you try to ``INSERT`` or ``DELETE`` data from a table while the pipeline is paused,
748
- it will block until the pipeline is resumed.
950
+ If you try to ``INSERT`` or ``DELETE`` data from a table while the
951
+ pipeline is paused, it will block until the pipeline is resumed.
749
952
 
750
953
  :param query: The SQL query to be executed.
751
954
 
@@ -756,6 +959,16 @@ resume a paused pipeline."""
756
959
  gen = self.query_tabular(query)
757
960
  deque(gen, maxlen=0)
758
961
 
962
+ def clear_storage(self):
963
+ """
964
+ Clears the storage of the pipeline if it is currently in use.
965
+ This action cannot be canceled, and will delete all the pipeline
966
+ storage.
967
+ """
968
+
969
+ if self.storage_status() == StorageStatus.INUSE:
970
+ self.client.clear_storage(self.name)
971
+
759
972
  @property
760
973
  def name(self) -> str:
761
974
  """
@@ -769,34 +982,124 @@ resume a paused pipeline."""
769
982
  Return the program SQL code of the pipeline.
770
983
  """
771
984
 
772
- self.refresh()
985
+ self.refresh(PipelineFieldSelector.ALL)
773
986
  return self._inner.program_code
774
987
 
988
+ def modify(
989
+ self,
990
+ sql: Optional[str] = None,
991
+ udf_rust: Optional[str] = None,
992
+ udf_toml: Optional[str] = None,
993
+ program_config: Optional[Mapping[str, Any]] = None,
994
+ runtime_config: Optional[Mapping[str, Any]] = None,
995
+ description: Optional[str] = None,
996
+ ):
997
+ """
998
+ Modify the pipeline.
999
+
1000
+ Modify the values of pipeline attributes: SQL code, UDF Rust code,
1001
+ UDF Rust dependencies (TOML), program config, runtime config, and
1002
+ description. Only the provided attributes will be modified. Other
1003
+ attributes will remain unchanged.
1004
+
1005
+ The pipeline must be in the STOPPED state to be modified.
1006
+
1007
+ :raises FelderaAPIError: If the pipeline is not in a STOPPED state.
1008
+ """
1009
+
1010
+ self.client.patch_pipeline(
1011
+ name=self._inner.name,
1012
+ sql=sql,
1013
+ udf_rust=udf_rust,
1014
+ udf_toml=udf_toml,
1015
+ program_config=program_config,
1016
+ runtime_config=runtime_config,
1017
+ description=description,
1018
+ )
1019
+
1020
+ def update_runtime(self):
1021
+ """
1022
+ Recompile a pipeline with the Feldera runtime version included in the
1023
+ currently installed Feldera platform.
1024
+
1025
+ Use this endpoint after upgrading Feldera to rebuild pipelines that were
1026
+ compiled with older platform versions. In most cases, recompilation is not
1027
+ required—pipelines compiled with older versions will continue to run on the
1028
+ upgraded platform.
1029
+
1030
+ Situations where recompilation may be necessary:
1031
+ - To benefit from the latest bug fixes and performance optimizations.
1032
+ - When backward-incompatible changes are introduced in Feldera. In this case,
1033
+ attempting to start a pipeline compiled with an unsupported version will
1034
+ result in an error.
1035
+
1036
+ If the pipeline is already compiled with the current platform version,
1037
+ this operation is a no-op.
1038
+
1039
+ Note that recompiling the pipeline with a new platform version may change its
1040
+ query plan. If the modified pipeline is started from an existing checkpoint,
1041
+ it may require bootstrapping parts of its state from scratch. See Feldera
1042
+ documentation for details on the bootstrapping process.
1043
+
1044
+ :raises FelderaAPIError: If the pipeline is not in a STOPPED state.
1045
+ """
1046
+
1047
+ self.client.update_pipeline_runtime(self._inner.name)
1048
+
1049
+ def storage_status(self) -> StorageStatus:
1050
+ """
1051
+ Return the storage status of the pipeline.
1052
+ """
1053
+
1054
+ self.refresh(PipelineFieldSelector.STATUS)
1055
+ return StorageStatus.from_str(self._inner.storage_status)
1056
+
775
1057
  def program_status(self) -> ProgramStatus:
776
1058
  """
777
1059
  Return the program status of the pipeline.
778
1060
 
779
1061
  Program status is the status of compilation of this SQL program.
780
- We first compile the SQL program to Rust code, and then compile the Rust code to a binary.
1062
+ We first compile the SQL program to Rust code, and then compile the
1063
+ Rust code to a binary.
781
1064
  """
782
1065
 
783
- self.refresh()
1066
+ self.refresh(PipelineFieldSelector.STATUS)
784
1067
  return ProgramStatus.from_value(self._inner.program_status)
785
1068
 
1069
+ def testing_force_update_platform_version(self, platform_version: str):
1070
+ """
1071
+ Used to simulate a pipeline compiled with a different platform version than the one currently in use.
1072
+ This is useful for testing platform upgrade behavior without actually upgrading Feldera.
1073
+
1074
+ This method is only available when Feldera runs with the "testing" unstable feature enabled.
1075
+ """
1076
+
1077
+ self.client.testing_force_update_platform_version(
1078
+ name=self._inner.name, platform_version=platform_version
1079
+ )
1080
+
786
1081
  def program_status_since(self) -> datetime:
787
1082
  """
788
1083
  Return the timestamp when the current program status was set.
789
1084
  """
790
1085
 
791
- self.refresh()
1086
+ self.refresh(PipelineFieldSelector.STATUS)
792
1087
  return datetime.fromisoformat(self._inner.program_status_since)
793
1088
 
1089
+ def platform_version(self) -> str:
1090
+ """
1091
+ Return the Feldera platform witch which the program was compiled.
1092
+ """
1093
+
1094
+ self.refresh(PipelineFieldSelector.STATUS)
1095
+ return self._inner.platform_version
1096
+
794
1097
  def udf_rust(self) -> str:
795
1098
  """
796
1099
  Return the Rust code for UDFs.
797
1100
  """
798
1101
 
799
- self.refresh()
1102
+ self.refresh(PipelineFieldSelector.ALL)
800
1103
  return self._inner.udf_rust
801
1104
 
802
1105
  def udf_toml(self) -> str:
@@ -804,7 +1107,7 @@ resume a paused pipeline."""
804
1107
  Return the Rust dependencies required by UDFs (in the TOML format).
805
1108
  """
806
1109
 
807
- self.refresh()
1110
+ self.refresh(PipelineFieldSelector.ALL)
808
1111
  return self._inner.udf_toml
809
1112
 
810
1113
  def program_config(self) -> Mapping[str, Any]:
@@ -812,23 +1115,40 @@ resume a paused pipeline."""
812
1115
  Return the program config of the pipeline.
813
1116
  """
814
1117
 
815
- self.refresh()
1118
+ self.refresh(PipelineFieldSelector.ALL)
816
1119
  return self._inner.program_config
817
1120
 
818
- def runtime_config(self) -> Mapping[str, Any]:
1121
+ def runtime_config(self) -> RuntimeConfig:
819
1122
  """
820
1123
  Return the runtime config of the pipeline.
821
1124
  """
822
1125
 
823
- self.refresh()
824
- return self._inner.runtime_config
1126
+ self.refresh(PipelineFieldSelector.ALL)
1127
+ return RuntimeConfig.from_dict(self._inner.runtime_config)
1128
+
1129
+ def set_runtime_config(self, runtime_config: RuntimeConfig):
1130
+ """Updates the runtime config of the pipeline. The pipeline
1131
+ must be stopped. Changing some pipeline configuration, such
1132
+ as the number of workers, requires storage to be cleared.
1133
+
1134
+ For example, to set 'min_batch_size_records' on a pipeline::
1135
+
1136
+ runtime_config = pipeline.runtime_config()
1137
+ runtime_config.min_batch_size_records = 500
1138
+ pipeline.set_runtime_config(runtime_config)
1139
+
1140
+ """
1141
+
1142
+ self.client.patch_pipeline(
1143
+ name=self._inner.name, runtime_config=runtime_config.to_dict()
1144
+ )
825
1145
 
826
1146
  def id(self) -> str:
827
1147
  """
828
1148
  Return the ID of the pipeline.
829
1149
  """
830
1150
 
831
- self.refresh()
1151
+ self.refresh(PipelineFieldSelector.STATUS)
832
1152
  return self._inner.id
833
1153
 
834
1154
  def description(self) -> str:
@@ -836,7 +1156,7 @@ resume a paused pipeline."""
836
1156
  Return the description of the pipeline.
837
1157
  """
838
1158
 
839
- self.refresh()
1159
+ self.refresh(PipelineFieldSelector.STATUS)
840
1160
  return self._inner.description
841
1161
 
842
1162
  def tables(self) -> List[SQLTable]:
@@ -844,7 +1164,7 @@ resume a paused pipeline."""
844
1164
  Return the tables of the pipeline.
845
1165
  """
846
1166
 
847
- self.refresh()
1167
+ self.refresh(PipelineFieldSelector.ALL)
848
1168
  return self._inner.tables
849
1169
 
850
1170
  def views(self) -> List[SQLView]:
@@ -852,7 +1172,7 @@ resume a paused pipeline."""
852
1172
  Return the views of the pipeline.
853
1173
  """
854
1174
 
855
- self.refresh()
1175
+ self.refresh(PipelineFieldSelector.ALL)
856
1176
  return self._inner.views
857
1177
 
858
1178
  def created_at(self) -> datetime:
@@ -860,7 +1180,7 @@ resume a paused pipeline."""
860
1180
  Return the creation time of the pipeline.
861
1181
  """
862
1182
 
863
- self.refresh()
1183
+ self.refresh(PipelineFieldSelector.STATUS)
864
1184
  return datetime.fromisoformat(self._inner.created_at)
865
1185
 
866
1186
  def version(self) -> int:
@@ -868,7 +1188,7 @@ resume a paused pipeline."""
868
1188
  Return the version of the pipeline.
869
1189
  """
870
1190
 
871
- self.refresh()
1191
+ self.refresh(PipelineFieldSelector.STATUS)
872
1192
  return self._inner.version
873
1193
 
874
1194
  def program_version(self) -> int:
@@ -876,15 +1196,16 @@ resume a paused pipeline."""
876
1196
  Return the program version of the pipeline.
877
1197
  """
878
1198
 
879
- self.refresh()
1199
+ self.refresh(PipelineFieldSelector.STATUS)
880
1200
  return self._inner.program_version
881
1201
 
882
1202
  def deployment_status_since(self) -> datetime:
883
1203
  """
884
- Return the timestamp when the current deployment status of the pipeline was set.
1204
+ Return the timestamp when the current deployment status of the pipeline
1205
+ was set.
885
1206
  """
886
1207
 
887
- self.refresh()
1208
+ self.refresh(PipelineFieldSelector.STATUS)
888
1209
  return datetime.fromisoformat(self._inner.deployment_status_since)
889
1210
 
890
1211
  def deployment_config(self) -> Mapping[str, Any]:
@@ -892,17 +1213,63 @@ resume a paused pipeline."""
892
1213
  Return the deployment config of the pipeline.
893
1214
  """
894
1215
 
895
- self.refresh()
1216
+ self.refresh(PipelineFieldSelector.ALL)
896
1217
  return self._inner.deployment_config
897
1218
 
898
- def deployment_desired_status(self) -> PipelineStatus:
1219
+ def deployment_desired_status(self) -> DeploymentDesiredStatus:
899
1220
  """
900
1221
  Return the desired deployment status of the pipeline.
901
1222
  This is the next state that the pipeline should transition to.
902
1223
  """
903
1224
 
904
- self.refresh()
905
- return PipelineStatus.from_str(self._inner.deployment_desired_status)
1225
+ self.refresh(PipelineFieldSelector.STATUS)
1226
+ return DeploymentDesiredStatus.from_str(self._inner.deployment_desired_status)
1227
+
1228
+ def deployment_resources_desired_status(self) -> DeploymentResourcesDesiredStatus:
1229
+ """
1230
+ Return the desired status of the the deployment resources.
1231
+ """
1232
+
1233
+ self.refresh(PipelineFieldSelector.STATUS)
1234
+ return DeploymentResourcesDesiredStatus.from_str(
1235
+ self._inner.deployment_resources_desired_status
1236
+ )
1237
+
1238
+ def deployment_resources_status(self) -> DeploymentResourcesStatus:
1239
+ """
1240
+ Return the status of the deployment resources.
1241
+ """
1242
+
1243
+ self.refresh(PipelineFieldSelector.STATUS)
1244
+ return DeploymentResourcesStatus.from_str(
1245
+ self._inner.deployment_resources_status
1246
+ )
1247
+
1248
+ def deployment_runtime_desired_status(self) -> DeploymentRuntimeDesiredStatus:
1249
+ """
1250
+ Return the deployment runtime desired status.
1251
+ """
1252
+
1253
+ self.refresh(PipelineFieldSelector.STATUS)
1254
+ return DeploymentRuntimeDesiredStatus.from_str(
1255
+ self._inner.deployment_runtime_desired_status
1256
+ )
1257
+
1258
+ def deployment_runtime_status(self) -> DeploymentRuntimeStatus:
1259
+ """
1260
+ Return the deployment runtime status.
1261
+ """
1262
+
1263
+ self.refresh(PipelineFieldSelector.STATUS)
1264
+ return DeploymentRuntimeStatus.from_str(self._inner.deployment_runtime_status)
1265
+
1266
+ def deployment_runtime_status_details(self) -> Optional[dict]:
1267
+ """
1268
+ Return the deployment runtime status details.
1269
+ """
1270
+
1271
+ self.refresh(PipelineFieldSelector.STATUS)
1272
+ return self._inner.deployment_runtime_status_details
906
1273
 
907
1274
  def deployment_error(self) -> Mapping[str, Any]:
908
1275
  """
@@ -910,43 +1277,38 @@ resume a paused pipeline."""
910
1277
  Returns an empty string if there is no error.
911
1278
  """
912
1279
 
913
- self.refresh()
1280
+ self.refresh(PipelineFieldSelector.STATUS)
914
1281
  return self._inner.deployment_error
915
1282
 
916
1283
  def deployment_location(self) -> str:
917
1284
  """
918
1285
  Return the deployment location of the pipeline.
919
- Deployment location is the location where the pipeline can be reached at runtime (a TCP port number or a URI).
1286
+ Deployment location is the location where the pipeline can be reached
1287
+ at runtime (a TCP port number or a URI).
920
1288
  """
921
1289
 
922
- self.refresh()
1290
+ self.refresh(PipelineFieldSelector.STATUS)
923
1291
  return self._inner.deployment_location
924
1292
 
925
- def program_binary_url(self) -> str:
926
- """
927
- Return the program binary URL of the pipeline.
928
- This is the URL where the compiled program binary can be downloaded from.
929
- """
930
-
931
- self.refresh()
932
- return self._inner.program_binary_url
933
-
934
1293
  def program_info(self) -> Mapping[str, Any]:
935
1294
  """
936
1295
  Return the program info of the pipeline.
937
- This is the output returned by the SQL compiler, including: the list of input and output connectors, the generated Rust code for the pipeline, and the SQL program schema.
1296
+ This is the output returned by the SQL compiler, including: the list of
1297
+ input and output connectors, the generated Rust code for the pipeline,
1298
+ and the SQL program schema.
938
1299
  """
939
1300
 
940
- self.refresh()
1301
+ self.refresh(PipelineFieldSelector.ALL)
941
1302
  return self._inner.program_info
942
1303
 
943
1304
  def program_error(self) -> Mapping[str, Any]:
944
1305
  """
945
1306
  Return the program error of the pipeline.
946
- If there are no errors, the `exit_code` field inside both `sql_compilation` and `rust_compilation` will be 0.
1307
+ If there are no errors, the `exit_code` field inside both
1308
+ `sql_compilation` and `rust_compilation` will be 0.
947
1309
  """
948
1310
 
949
- self.refresh()
1311
+ self.refresh(PipelineFieldSelector.ALL)
950
1312
  return self._inner.program_error
951
1313
 
952
1314
  def errors(self) -> List[Mapping[str, Any]]:
@@ -963,3 +1325,98 @@ resume a paused pipeline."""
963
1325
  if derr:
964
1326
  errors.append(derr)
965
1327
  return errors
1328
+
1329
+ def support_bundle(
1330
+ self,
1331
+ output_path: Optional[str] = None,
1332
+ *,
1333
+ circuit_profile: bool = True,
1334
+ heap_profile: bool = True,
1335
+ metrics: bool = True,
1336
+ logs: bool = True,
1337
+ stats: bool = True,
1338
+ pipeline_config: bool = True,
1339
+ system_config: bool = True,
1340
+ ) -> bytes:
1341
+ """
1342
+ Generate a support bundle containing diagnostic information from this pipeline.
1343
+
1344
+ This method collects various diagnostic data from the pipeline including
1345
+ circuit profile, heap profile, metrics, logs, stats, and connector statistics,
1346
+ and packages them into a single ZIP file for support purposes.
1347
+
1348
+ :param output_path: Optional path to save the support bundle file. If None,
1349
+ the support bundle is only returned as bytes.
1350
+ :param circuit_profile: Whether to collect circuit profile data (default: True)
1351
+ :param heap_profile: Whether to collect heap profile data (default: True)
1352
+ :param metrics: Whether to collect metrics data (default: True)
1353
+ :param logs: Whether to collect logs data (default: True)
1354
+ :param stats: Whether to collect stats data (default: True)
1355
+ :param pipeline_config: Whether to collect pipeline configuration data (default: True)
1356
+ :param system_config: Whether to collect system configuration data (default: True)
1357
+ :return: The support bundle as bytes (ZIP archive)
1358
+ :raises FelderaAPIError: If the pipeline does not exist or if there's an error
1359
+ """
1360
+
1361
+ # Build query parameters
1362
+ params = {}
1363
+ if not circuit_profile:
1364
+ params["circuit_profile"] = "false"
1365
+ if not heap_profile:
1366
+ params["heap_profile"] = "false"
1367
+ if not metrics:
1368
+ params["metrics"] = "false"
1369
+ if not logs:
1370
+ params["logs"] = "false"
1371
+ if not stats:
1372
+ params["stats"] = "false"
1373
+ if not pipeline_config:
1374
+ params["pipeline_config"] = "false"
1375
+ if not system_config:
1376
+ params["system_config"] = "false"
1377
+
1378
+ support_bundle_bytes = self.client.get_pipeline_support_bundle(
1379
+ self.name, params=params
1380
+ )
1381
+
1382
+ if output_path is not None:
1383
+ path = pathlib.Path(output_path)
1384
+
1385
+ # Ensure the file has .zip extension
1386
+ if path.suffix != ".zip":
1387
+ path = path.with_suffix(".zip")
1388
+
1389
+ with open(path, "wb") as f:
1390
+ f.write(support_bundle_bytes)
1391
+
1392
+ print(f"Support bundle written to {path}")
1393
+
1394
+ return support_bundle_bytes
1395
+
1396
+ def generate_completion_token(self, table_name: str, connector_name: str) -> str:
1397
+ """
1398
+ Returns a completion token that can be passed to :meth:`.Pipeline.completion_token_status` to
1399
+ check whether the pipeline has finished processing all inputs received from the connector before
1400
+ the token was generated.
1401
+ """
1402
+
1403
+ return self.client.generate_completion_token(
1404
+ self.name, table_name, connector_name
1405
+ )
1406
+
1407
+ def completion_token_status(self, token: str) -> CompletionTokenStatus:
1408
+ """
1409
+ Returns the status of the completion token.
1410
+ """
1411
+
1412
+ if self.client.completion_token_processed(self.name, token):
1413
+ return CompletionTokenStatus.COMPLETE
1414
+ else:
1415
+ return CompletionTokenStatus.IN_PROGRESS
1416
+
1417
+ def wait_for_token(self, token: str):
1418
+ """
1419
+ Blocks until the pipeline processes all inputs represented by the completion token.
1420
+ """
1421
+
1422
+ self.client.wait_for_token(self.name, token)