feldera 0.131.0__py3-none-any.whl → 0.189.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of feldera might be problematic. Click here for more details.

feldera/pipeline.py CHANGED
@@ -7,15 +7,28 @@ import pandas
7
7
  from uuid import UUID
8
8
 
9
9
  from typing import List, Dict, Callable, Optional, Generator, Mapping, Any
10
+ from threading import Event
10
11
  from collections import deque
11
- from queue import Queue
12
12
 
13
13
  from feldera.rest.errors import FelderaAPIError
14
- from feldera.enums import PipelineStatus, ProgramStatus, CheckpointStatus
15
- from feldera.enums import StorageStatus
14
+ from feldera.enums import (
15
+ BootstrapPolicy,
16
+ CompletionTokenStatus,
17
+ PipelineFieldSelector,
18
+ PipelineStatus,
19
+ ProgramStatus,
20
+ CheckpointStatus,
21
+ TransactionStatus,
22
+ StorageStatus,
23
+ DeploymentDesiredStatus,
24
+ DeploymentResourcesDesiredStatus,
25
+ DeploymentResourcesStatus,
26
+ DeploymentRuntimeDesiredStatus,
27
+ DeploymentRuntimeStatus,
28
+ )
16
29
  from feldera.rest.pipeline import Pipeline as InnerPipeline
17
30
  from feldera.rest.feldera_client import FelderaClient
18
- from feldera._callback_runner import _CallbackRunnerInstruction, CallbackRunner
31
+ from feldera._callback_runner import CallbackRunner
19
32
  from feldera.output_handler import OutputHandler
20
33
  from feldera._helpers import ensure_dataframe_has_columns, chunk_dataframe
21
34
  from feldera.rest.sql_table import SQLTable
@@ -28,7 +41,6 @@ class Pipeline:
28
41
  def __init__(self, client: FelderaClient):
29
42
  self.client: FelderaClient = client
30
43
  self._inner: InnerPipeline | None = None
31
- self.views_tx: List[Dict[str, Queue]] = []
32
44
 
33
45
  @staticmethod
34
46
  def _from_inner(inner: InnerPipeline, client: FelderaClient) -> "Pipeline":
@@ -36,28 +48,16 @@ class Pipeline:
36
48
  pipeline._inner = inner
37
49
  return pipeline
38
50
 
39
- def __setup_output_listeners(self):
40
- """
41
- Internal function used to set up the output listeners.
42
-
43
- :meta private:
44
- """
45
-
46
- for view_queue in self.views_tx:
47
- for view_name, queue in view_queue.items():
48
- # sends a message to the callback runner to start listening
49
- queue.put(_CallbackRunnerInstruction.PipelineStarted)
50
- # block until the callback runner is ready
51
- queue.join()
52
-
53
- def refresh(self):
51
+ def refresh(self, field_selector: PipelineFieldSelector):
54
52
  """
55
53
  Calls the backend to get the updated, latest version of the pipeline.
56
54
 
55
+ :param field_selector: Choose what pipeline information to refresh; see PipelineFieldSelector enum definition.
56
+
57
57
  :raises FelderaConnectionError: If there is an issue connecting to the backend.
58
58
  """
59
59
 
60
- self._inner = self.client.get_pipeline(self.name)
60
+ self._inner = self.client.get_pipeline(self.name, field_selector)
61
61
 
62
62
  def status(self) -> PipelineStatus:
63
63
  """
@@ -65,7 +65,7 @@ class Pipeline:
65
65
  """
66
66
 
67
67
  try:
68
- self.refresh()
68
+ self.refresh(PipelineFieldSelector.STATUS)
69
69
  return PipelineStatus.from_str(self._inner.deployment_status)
70
70
 
71
71
  except FelderaAPIError as err:
@@ -74,6 +74,30 @@ class Pipeline:
74
74
  else:
75
75
  raise err
76
76
 
77
+ def wait_for_status(
78
+ self, expected_status: PipelineStatus, timeout: Optional[int] = None
79
+ ) -> None:
80
+ """
81
+ Wait for the pipeline to reach the specified status.
82
+
83
+ :param expected_status: The status to wait for
84
+ :param timeout: Maximum time to wait in seconds. If None, waits forever (default: None)
85
+ :raises TimeoutError: If the expected status is not reached within the timeout
86
+ """
87
+ start_time = time.time()
88
+
89
+ while True:
90
+ current_status = self.status()
91
+ if current_status == expected_status:
92
+ return
93
+
94
+ if timeout is not None and time.time() - start_time >= timeout:
95
+ raise TimeoutError(
96
+ f"Pipeline did not reach {expected_status.name} status within {timeout} seconds"
97
+ )
98
+
99
+ time.sleep(1)
100
+
77
101
  def stats(self) -> PipelineStatistics:
78
102
  """Gets the pipeline metrics and performance counters."""
79
103
 
@@ -114,7 +138,7 @@ class Pipeline:
114
138
 
115
139
  ensure_dataframe_has_columns(df)
116
140
 
117
- pipeline = self.client.get_pipeline(self.name)
141
+ pipeline = self.client.get_pipeline(self.name, PipelineFieldSelector.ALL)
118
142
  if table_name.lower() != "now" and table_name.lower() not in [
119
143
  tbl.name.lower() for tbl in pipeline.tables
120
144
  ]:
@@ -224,23 +248,21 @@ class Pipeline:
224
248
  def listen(self, view_name: str) -> OutputHandler:
225
249
  """
226
250
  Follow the change stream (i.e., the output) of the provided view.
227
- Returns an output handler to read the changes.
251
+ Returns an output handle to read the changes.
228
252
 
229
- When the pipeline is stopped, these listeners are dropped.
253
+ When the pipeline is stopped, the handle is dropped.
230
254
 
231
- You must call this method before starting the pipeline to get the entire output of the view.
232
- If this method is called once the pipeline has started, you will only get the output from that point onwards.
255
+ The handle will only receive changes from the point in time when the listener is created.
256
+ In order to receive all changes since the pipeline started, you can create the pipeline in the `PAUSED` state
257
+ using :meth:`start_paused`, attach listeners and unpause the pipeline using :meth:`resume`.
233
258
 
234
259
  :param view_name: The name of the view to listen to.
235
260
  """
236
261
 
237
- queue: Optional[Queue] = None
238
-
239
262
  if self.status() not in [PipelineStatus.PAUSED, PipelineStatus.RUNNING]:
240
- queue = Queue(maxsize=1)
241
- self.views_tx.append({view_name: queue})
263
+ raise RuntimeError("Pipeline must be running or paused to listen to output")
242
264
 
243
- handler = OutputHandler(self.client, self.name, view_name, queue)
265
+ handler = OutputHandler(self.client, self.name, view_name)
244
266
  handler.start()
245
267
 
246
268
  return handler
@@ -251,8 +273,9 @@ class Pipeline:
251
273
  """
252
274
  Run the given callback on each chunk of the output of the specified view.
253
275
 
254
- You must call this method before starting the pipeline to operate on the entire output.
255
- You can call this method after the pipeline has started, but you will only get the output from that point onwards.
276
+ The callback will only receive changes from the point in time when the listener is created.
277
+ In order to receive all changes since the pipeline started, you can create the pipeline in the `PAUSED` state
278
+ using :meth:`start_paused`, attach listeners and unpause the pipeline using :meth:`resume`.
256
279
 
257
280
  :param view_name: The name of the view.
258
281
  :param callback: The callback to run on each chunk. The callback should take two arguments:
@@ -270,17 +293,18 @@ class Pipeline:
270
293
 
271
294
  """
272
295
 
273
- queue: Optional[Queue] = None
274
-
275
296
  if self.status() not in [PipelineStatus.RUNNING, PipelineStatus.PAUSED]:
276
- queue = Queue(maxsize=1)
277
- self.views_tx.append({view_name: queue})
297
+ raise RuntimeError("Pipeline must be running or paused to listen to output")
278
298
 
279
- handler = CallbackRunner(self.client, self.name, view_name, callback, queue)
299
+ event = Event()
300
+ handler = CallbackRunner(
301
+ self.client, self.name, view_name, callback, lambda exception: None, event
302
+ )
280
303
  handler.start()
304
+ event.wait()
281
305
 
282
306
  def wait_for_completion(
283
- self, force_stop: bool = False, timeout_s: Optional[float] = None
307
+ self, force_stop: bool = False, timeout_s: float | None = None
284
308
  ):
285
309
  """
286
310
  Block until the pipeline has completed processing all input records.
@@ -308,6 +332,7 @@ class Pipeline:
308
332
  PipelineStatus.RUNNING,
309
333
  PipelineStatus.INITIALIZING,
310
334
  PipelineStatus.PROVISIONING,
335
+ PipelineStatus.BOOTSTRAPPING,
311
336
  ]:
312
337
  raise RuntimeError("Pipeline must be running to wait for completion")
313
338
 
@@ -326,7 +351,7 @@ class Pipeline:
326
351
  f" time {elapsed}s, timeout: {timeout_s}s"
327
352
  )
328
353
 
329
- pipeline_complete: bool = self.stats().global_metrics.pipeline_complete
354
+ pipeline_complete: bool = self.is_complete()
330
355
  if pipeline_complete is None:
331
356
  raise RuntimeError(
332
357
  "received unknown metrics from the pipeline, pipeline_complete is None"
@@ -339,67 +364,23 @@ class Pipeline:
339
364
  if force_stop:
340
365
  self.stop(force=True)
341
366
 
342
- def start(self, wait: bool = True, timeout_s: Optional[float] = None):
367
+ def is_complete(self) -> bool:
343
368
  """
344
- .. _start:
345
-
346
- Starts this pipeline.
347
-
348
- - The pipeline must be in STOPPED state to start.
349
- - If the pipeline is in any other state, an error will be raised.
350
- - If the pipeline is in PAUSED state, use `.meth:resume` instead.
369
+ Check if the pipeline has completed processing all input records.
351
370
 
352
- :param timeout_s: The maximum time (in seconds) to wait for the
353
- pipeline to start.
354
- :param wait: Set True to wait for the pipeline to start. True by default
355
-
356
- :raises RuntimeError: If the pipeline is not in STOPPED state.
357
- """
358
-
359
- status = self.status()
360
- if status != PipelineStatus.STOPPED:
361
- raise RuntimeError(
362
- f"""Cannot start pipeline '{self.name}' in state \
363
- '{str(status.name)}'. The pipeline must be in STOPPED state before it can be \
364
- started. You can either stop the pipeline using the `Pipeline.stop()` \
365
- method or use `Pipeline.resume()` to resume a paused pipeline."""
366
- )
367
-
368
- if not wait:
369
- if len(self.views_tx) > 0:
370
- raise ValueError(
371
- "cannot start with 'wait=False' when output listeners are configured. Try setting 'wait=True'."
372
- )
373
-
374
- self.client.start_pipeline(self.name, wait=wait)
375
-
376
- return
377
-
378
- self.client.pause_pipeline(
379
- self.name, "Unable to START the pipeline.\n", wait=wait, timeout_s=timeout_s
380
- )
381
- self.__setup_output_listeners()
382
- self.resume(timeout_s=timeout_s)
383
-
384
- def restart(self, timeout_s: Optional[float] = None):
385
- """
386
- Restarts the pipeline.
387
-
388
- This method forcibly **STOPS** the pipeline regardless of its current
389
- state and then starts it again. No checkpoints are made when stopping
390
- the pipeline.
391
-
392
- :param timeout_s: The maximum time (in seconds) to wait for the
393
- pipeline to restart.
371
+ Returns True if (1) all input connectors attached to the
372
+ pipeline have finished reading their input data sources and issued
373
+ end-of-input notifications to the pipeline, and (2) all inputs received
374
+ from these connectors have been fully processed and corresponding
375
+ outputs have been sent out through the output connectors.
394
376
  """
395
377
 
396
- self.stop(force=True, timeout_s=timeout_s)
397
- self.start(timeout_s=timeout_s)
378
+ return self.stats().global_metrics.pipeline_complete
398
379
 
399
380
  def wait_for_idle(
400
381
  self,
401
382
  idle_interval_s: float = 5.0,
402
- timeout_s: float = 600.0,
383
+ timeout_s: float | None = None,
403
384
  poll_interval_s: float = 0.2,
404
385
  ):
405
386
  """
@@ -419,12 +400,12 @@ method or use `Pipeline.resume()` to resume a paused pipeline."""
419
400
  :raises RuntimeError: If the metrics are missing or the timeout was
420
401
  reached.
421
402
  """
422
- if idle_interval_s > timeout_s:
403
+ if timeout_s is not None and idle_interval_s > timeout_s:
423
404
  raise ValueError(
424
405
  f"idle interval ({idle_interval_s}s) cannot be larger than"
425
406
  f" timeout ({timeout_s}s)"
426
407
  )
427
- if poll_interval_s > timeout_s:
408
+ if timeout_s is not None and poll_interval_s > timeout_s:
428
409
  raise ValueError(
429
410
  f"poll interval ({poll_interval_s}s) cannot be larger than"
430
411
  f" timeout ({timeout_s}s)"
@@ -470,11 +451,13 @@ metrics"""
470
451
  return
471
452
 
472
453
  # Timeout
473
- if now_s - start_time_s >= timeout_s:
454
+ if timeout_s is not None and now_s - start_time_s >= timeout_s:
474
455
  raise RuntimeError(f"waiting for idle reached timeout ({timeout_s}s)")
475
456
  time.sleep(poll_interval_s)
476
457
 
477
- def activate(self, wait: bool = True, timeout_s: Optional[float] = None):
458
+ def activate(
459
+ self, wait: bool = True, timeout_s: Optional[float] = None
460
+ ) -> Optional[PipelineStatus]:
478
461
  """
479
462
  Activates the pipeline when starting from STANDBY mode. Only applicable
480
463
  when the pipeline is starting from a checkpoint in object store.
@@ -485,7 +468,82 @@ metrics"""
485
468
  pipeline to pause.
486
469
  """
487
470
 
488
- self.client.activate_pipeline(self.name, wait=wait, timeout_s=timeout_s)
471
+ return self.client.activate_pipeline(self.name, wait=wait, timeout_s=timeout_s)
472
+
473
+ def start(
474
+ self,
475
+ bootstrap_policy: Optional[BootstrapPolicy] = None,
476
+ wait: bool = True,
477
+ timeout_s: Optional[float] = None,
478
+ ):
479
+ """
480
+ .. _start:
481
+
482
+ Starts this pipeline.
483
+
484
+ - The pipeline must be in STOPPED state to start.
485
+ - If the pipeline is in any other state, an error will be raised.
486
+ - If the pipeline is in PAUSED state, use `.meth:resume` instead.
487
+
488
+ :param bootstrap_policy: The bootstrap policy to use.
489
+ :param timeout_s: The maximum time (in seconds) to wait for the
490
+ pipeline to start.
491
+ :param wait: Set True to wait for the pipeline to start. True by default
492
+
493
+ :raises RuntimeError: If the pipeline is not in STOPPED state.
494
+ """
495
+
496
+ self.client.start_pipeline(
497
+ self.name, bootstrap_policy=bootstrap_policy, wait=wait, timeout_s=timeout_s
498
+ )
499
+
500
+ def start_paused(
501
+ self,
502
+ bootstrap_policy: Optional[BootstrapPolicy] = None,
503
+ wait: bool = True,
504
+ timeout_s: Optional[float] = None,
505
+ ):
506
+ """
507
+ Starts the pipeline in the paused state.
508
+ """
509
+
510
+ return self.client.start_pipeline_as_paused(
511
+ self.name, bootstrap_policy=bootstrap_policy, wait=wait, timeout_s=timeout_s
512
+ )
513
+
514
+ def start_standby(
515
+ self,
516
+ bootstrap_policy: Optional[BootstrapPolicy] = None,
517
+ wait: bool = True,
518
+ timeout_s: Optional[float] = None,
519
+ ):
520
+ """
521
+ Starts the pipeline in the standby state.
522
+ """
523
+
524
+ self.client.start_pipeline_as_standby(
525
+ self.name, bootstrap_policy=bootstrap_policy, wait=wait, timeout_s=timeout_s
526
+ )
527
+
528
+ def restart(
529
+ self,
530
+ bootstrap_policy: Optional[BootstrapPolicy] = None,
531
+ timeout_s: Optional[float] = None,
532
+ ):
533
+ """
534
+ Restarts the pipeline.
535
+
536
+ This method forcibly **STOPS** the pipeline regardless of its current
537
+ state and then starts it again. No checkpoints are made when stopping
538
+ the pipeline.
539
+
540
+ :param bootstrap_policy: The bootstrap policy to use.
541
+ :param timeout_s: The maximum time (in seconds) to wait for the
542
+ pipeline to restart.
543
+ """
544
+
545
+ self.stop(force=True, timeout_s=timeout_s)
546
+ self.start(bootstrap_policy=bootstrap_policy, timeout_s=timeout_s)
489
547
 
490
548
  def pause(self, wait: bool = True, timeout_s: Optional[float] = None):
491
549
  """
@@ -516,24 +574,22 @@ metrics"""
516
574
  pipeline to stop.
517
575
  """
518
576
 
519
- if wait:
520
- for view_queue in self.views_tx:
521
- for _, queue in view_queue.items():
522
- # sends a message to the callback runner to stop listening
523
- queue.put(_CallbackRunnerInstruction.RanToCompletion)
524
-
525
- if len(self.views_tx) > 0:
526
- while self.views_tx:
527
- view = self.views_tx.pop()
528
- for view_name, queue in view.items():
529
- # block until the callback runner has been stopped
530
- queue.join()
531
-
532
- time.sleep(3)
533
577
  self.client.stop_pipeline(
534
578
  self.name, force=force, wait=wait, timeout_s=timeout_s
535
579
  )
536
580
 
581
+ def approve(self):
582
+ """
583
+ Approves the pipeline to proceed with bootstrapping.
584
+
585
+ This method is used when a pipeline has been started with
586
+ `bootstrap_policy=BootstrapPolicy.AWAIT_APPROVAL` and is currently in the
587
+ AWAITINGAPPROVAL state. The pipeline will wait for explicit user approval
588
+ before proceeding with the bootstrapping process.
589
+ """
590
+
591
+ self.client.approve_pipeline(self.name)
592
+
537
593
  def resume(self, wait: bool = True, timeout_s: Optional[float] = None):
538
594
  """
539
595
  Resumes the pipeline from the PAUSED state. If the pipeline is already
@@ -544,17 +600,19 @@ metrics"""
544
600
  pipeline to resume.
545
601
  """
546
602
 
547
- self.client.start_pipeline(self.name, wait=wait, timeout_s=timeout_s)
603
+ self.client.resume_pipeline(self.name, wait=wait, timeout_s=timeout_s)
548
604
 
549
- def start_transaction(self):
605
+ def start_transaction(self) -> int:
550
606
  """
551
607
  Start a new transaction.
552
608
 
553
- Returns:
554
- Transaction ID.
609
+ :return: Transaction ID.
610
+
611
+ :raises FelderaAPIError: If the pipeline fails to start a transaction, e.g., if the pipeline is not running or
612
+ there is already an active transaction.
555
613
  """
556
614
 
557
- self.client.start_transaction(self.name)
615
+ return self.client.start_transaction(self.name)
558
616
 
559
617
  def commit_transaction(
560
618
  self,
@@ -563,7 +621,7 @@ metrics"""
563
621
  timeout_s: Optional[float] = None,
564
622
  ):
565
623
  """
566
- Commits the currently active transaction.
624
+ Commit the currently active transaction.
567
625
 
568
626
  :param transaction_id: If provided, the function verifies that the currently active transaction matches this ID.
569
627
  If the active transaction ID does not match, the function raises an error.
@@ -577,11 +635,36 @@ metrics"""
577
635
  :raises RuntimeError: If there is currently no transaction in progress.
578
636
  :raises ValueError: If the provided `transaction_id` does not match the current transaction.
579
637
  :raises TimeoutError: If the transaction does not commit within the specified timeout (when `wait` is True).
580
- :raises FelderaAPIError: If the pipeline fails to start a transaction.
638
+ :raises FelderaAPIError: If the pipeline fails to commit a transaction.
581
639
  """
582
640
 
583
641
  self.client.commit_transaction(self.name, transaction_id, wait, timeout_s)
584
642
 
643
+ def transaction_status(self) -> TransactionStatus:
644
+ """
645
+ Get pipeline's transaction handling status.
646
+
647
+ :return: Current transaction handling status of the pipeline.
648
+
649
+ :raises FelderaAPIError: If pipeline's status couldn't be read, e.g., because the pipeline is not currently running.
650
+ """
651
+
652
+ return self.stats().global_metrics.transaction_status
653
+
654
+ def transaction_id(self) -> Optional[int]:
655
+ """
656
+ Gets the ID of the currently active transaction or None if there is no active transaction.
657
+
658
+ :return: The ID of the transaction.
659
+ """
660
+
661
+ transaction_id = self.stats().global_metrics.transaction_id
662
+
663
+ if transaction_id == 0:
664
+ return None
665
+ else:
666
+ return transaction_id
667
+
585
668
  def delete(self, clear_storage: bool = False):
586
669
  """
587
670
  Deletes the pipeline.
@@ -610,14 +693,25 @@ metrics"""
610
693
  """
611
694
 
612
695
  try:
613
- inner = client.get_pipeline(name)
696
+ inner = client.get_pipeline(name, PipelineFieldSelector.ALL)
614
697
  return Pipeline._from_inner(inner, client)
615
698
  except FelderaAPIError as err:
616
699
  if err.status_code == 404:
617
700
  err.message = f"Pipeline with name {name} not found"
618
701
  raise err
619
702
 
620
- def checkpoint(self, wait: bool = False, timeout_s=300) -> int:
703
+ @staticmethod
704
+ def all(client: FelderaClient) -> List["Pipeline"]:
705
+ """
706
+ Get all pipelines.
707
+
708
+ :param client: The FelderaClient instance.
709
+ :return: A list of Pipeline objects.
710
+ """
711
+
712
+ return [Pipeline._from_inner(p, client) for p in client.pipelines()]
713
+
714
+ def checkpoint(self, wait: bool = False, timeout_s: Optional[float] = None) -> int:
621
715
  """
622
716
  Checkpoints this pipeline.
623
717
 
@@ -625,6 +719,8 @@ metrics"""
625
719
  :param timeout_s: The maximum time (in seconds) to wait for the
626
720
  checkpoint to complete.
627
721
 
722
+ :return: The checkpoint sequence number.
723
+
628
724
  :raises FelderaAPIError: If enterprise features are not enabled.
629
725
  """
630
726
 
@@ -637,7 +733,7 @@ metrics"""
637
733
 
638
734
  while True:
639
735
  elapsed = time.monotonic() - start
640
- if elapsed > timeout_s:
736
+ if timeout_s is not None and elapsed > timeout_s:
641
737
  raise TimeoutError(
642
738
  f"""timeout ({timeout_s}s) reached while waiting for \
643
739
  pipeline '{self.name}' to make checkpoint '{seq}'"""
@@ -647,9 +743,7 @@ pipeline '{self.name}' to make checkpoint '{seq}'"""
647
743
  time.sleep(0.1)
648
744
  continue
649
745
 
650
- return status
651
-
652
- return seq
746
+ return seq
653
747
 
654
748
  def checkpoint_status(self, seq: int) -> CheckpointStatus:
655
749
  """
@@ -675,7 +769,9 @@ pipeline '{self.name}' to make checkpoint '{seq}'"""
675
769
  if seq < success:
676
770
  return CheckpointStatus.Unknown
677
771
 
678
- def sync_checkpoint(self, wait: bool = False, timeout_s=300) -> str:
772
+ def sync_checkpoint(
773
+ self, wait: bool = False, timeout_s: Optional[float] = None
774
+ ) -> str:
679
775
  """
680
776
  Syncs this checkpoint to object store.
681
777
 
@@ -685,6 +781,7 @@ pipeline '{self.name}' to make checkpoint '{seq}'"""
685
781
  checkpoint to complete syncing.
686
782
 
687
783
  :raises FelderaAPIError: If no checkpoints have been made.
784
+ :raises RuntimeError: If syncing the checkpoint fails.
688
785
  """
689
786
 
690
787
  uuid = self.client.sync_checkpoint(self.name)
@@ -696,12 +793,17 @@ pipeline '{self.name}' to make checkpoint '{seq}'"""
696
793
 
697
794
  while True:
698
795
  elapsed = time.monotonic() - start
699
- if elapsed > timeout_s:
796
+ if timeout_s is not None and elapsed > timeout_s:
700
797
  raise TimeoutError(
701
798
  f"""timeout ({timeout_s}s) reached while waiting for \
702
799
  pipeline '{self.name}' to sync checkpoint '{uuid}'"""
703
800
  )
704
801
  status = self.sync_checkpoint_status(uuid)
802
+ if status == CheckpointStatus.Failure:
803
+ raise RuntimeError(
804
+ f"failed to sync checkpoint '{uuid}': ", status.get_error()
805
+ )
806
+
705
807
  if status in [CheckpointStatus.InProgress, CheckpointStatus.Unknown]:
706
808
  time.sleep(0.1)
707
809
  continue
@@ -716,6 +818,9 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
716
818
  If the checkpoint is currently being synchronized, returns
717
819
  `CheckpointStatus.Unknown`.
718
820
 
821
+ Failures are not raised as runtime errors and must be explicitly
822
+ checked.
823
+
719
824
  :param uuid: The checkpoint uuid.
720
825
  """
721
826
 
@@ -731,6 +836,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
731
836
  if uuid == fail.get("uuid"):
732
837
  failure = CheckpointStatus.Failure
733
838
  failure.error = fail.get("error", "")
839
+ logging.error(f"failed to sync checkpoint '{uuid}': {failure.error}")
734
840
  return failure
735
841
 
736
842
  if (success is None) or UUID(uuid) > UUID(success):
@@ -876,15 +982,76 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
876
982
  Return the program SQL code of the pipeline.
877
983
  """
878
984
 
879
- self.refresh()
985
+ self.refresh(PipelineFieldSelector.ALL)
880
986
  return self._inner.program_code
881
987
 
988
+ def modify(
989
+ self,
990
+ sql: Optional[str] = None,
991
+ udf_rust: Optional[str] = None,
992
+ udf_toml: Optional[str] = None,
993
+ program_config: Optional[Mapping[str, Any]] = None,
994
+ runtime_config: Optional[Mapping[str, Any]] = None,
995
+ description: Optional[str] = None,
996
+ ):
997
+ """
998
+ Modify the pipeline.
999
+
1000
+ Modify the values of pipeline attributes: SQL code, UDF Rust code,
1001
+ UDF Rust dependencies (TOML), program config, runtime config, and
1002
+ description. Only the provided attributes will be modified. Other
1003
+ attributes will remain unchanged.
1004
+
1005
+ The pipeline must be in the STOPPED state to be modified.
1006
+
1007
+ :raises FelderaAPIError: If the pipeline is not in a STOPPED state.
1008
+ """
1009
+
1010
+ self.client.patch_pipeline(
1011
+ name=self._inner.name,
1012
+ sql=sql,
1013
+ udf_rust=udf_rust,
1014
+ udf_toml=udf_toml,
1015
+ program_config=program_config,
1016
+ runtime_config=runtime_config,
1017
+ description=description,
1018
+ )
1019
+
1020
+ def update_runtime(self):
1021
+ """
1022
+ Recompile a pipeline with the Feldera runtime version included in the
1023
+ currently installed Feldera platform.
1024
+
1025
+ Use this endpoint after upgrading Feldera to rebuild pipelines that were
1026
+ compiled with older platform versions. In most cases, recompilation is not
1027
+ required—pipelines compiled with older versions will continue to run on the
1028
+ upgraded platform.
1029
+
1030
+ Situations where recompilation may be necessary:
1031
+ - To benefit from the latest bug fixes and performance optimizations.
1032
+ - When backward-incompatible changes are introduced in Feldera. In this case,
1033
+ attempting to start a pipeline compiled with an unsupported version will
1034
+ result in an error.
1035
+
1036
+ If the pipeline is already compiled with the current platform version,
1037
+ this operation is a no-op.
1038
+
1039
+ Note that recompiling the pipeline with a new platform version may change its
1040
+ query plan. If the modified pipeline is started from an existing checkpoint,
1041
+ it may require bootstrapping parts of its state from scratch. See Feldera
1042
+ documentation for details on the bootstrapping process.
1043
+
1044
+ :raises FelderaAPIError: If the pipeline is not in a STOPPED state.
1045
+ """
1046
+
1047
+ self.client.update_pipeline_runtime(self._inner.name)
1048
+
882
1049
  def storage_status(self) -> StorageStatus:
883
1050
  """
884
1051
  Return the storage status of the pipeline.
885
1052
  """
886
1053
 
887
- self.refresh()
1054
+ self.refresh(PipelineFieldSelector.STATUS)
888
1055
  return StorageStatus.from_str(self._inner.storage_status)
889
1056
 
890
1057
  def program_status(self) -> ProgramStatus:
@@ -896,23 +1063,43 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
896
1063
  Rust code to a binary.
897
1064
  """
898
1065
 
899
- self.refresh()
1066
+ self.refresh(PipelineFieldSelector.STATUS)
900
1067
  return ProgramStatus.from_value(self._inner.program_status)
901
1068
 
1069
+ def testing_force_update_platform_version(self, platform_version: str):
1070
+ """
1071
+ Used to simulate a pipeline compiled with a different platform version than the one currently in use.
1072
+ This is useful for testing platform upgrade behavior without actually upgrading Feldera.
1073
+
1074
+ This method is only available when Feldera runs with the "testing" unstable feature enabled.
1075
+ """
1076
+
1077
+ self.client.testing_force_update_platform_version(
1078
+ name=self._inner.name, platform_version=platform_version
1079
+ )
1080
+
902
1081
  def program_status_since(self) -> datetime:
903
1082
  """
904
1083
  Return the timestamp when the current program status was set.
905
1084
  """
906
1085
 
907
- self.refresh()
1086
+ self.refresh(PipelineFieldSelector.STATUS)
908
1087
  return datetime.fromisoformat(self._inner.program_status_since)
909
1088
 
1089
+ def platform_version(self) -> str:
1090
+ """
1091
+ Return the Feldera platform witch which the program was compiled.
1092
+ """
1093
+
1094
+ self.refresh(PipelineFieldSelector.STATUS)
1095
+ return self._inner.platform_version
1096
+
910
1097
  def udf_rust(self) -> str:
911
1098
  """
912
1099
  Return the Rust code for UDFs.
913
1100
  """
914
1101
 
915
- self.refresh()
1102
+ self.refresh(PipelineFieldSelector.ALL)
916
1103
  return self._inner.udf_rust
917
1104
 
918
1105
  def udf_toml(self) -> str:
@@ -920,7 +1107,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
920
1107
  Return the Rust dependencies required by UDFs (in the TOML format).
921
1108
  """
922
1109
 
923
- self.refresh()
1110
+ self.refresh(PipelineFieldSelector.ALL)
924
1111
  return self._inner.udf_toml
925
1112
 
926
1113
  def program_config(self) -> Mapping[str, Any]:
@@ -928,7 +1115,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
928
1115
  Return the program config of the pipeline.
929
1116
  """
930
1117
 
931
- self.refresh()
1118
+ self.refresh(PipelineFieldSelector.ALL)
932
1119
  return self._inner.program_config
933
1120
 
934
1121
  def runtime_config(self) -> RuntimeConfig:
@@ -936,7 +1123,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
936
1123
  Return the runtime config of the pipeline.
937
1124
  """
938
1125
 
939
- self.refresh()
1126
+ self.refresh(PipelineFieldSelector.ALL)
940
1127
  return RuntimeConfig.from_dict(self._inner.runtime_config)
941
1128
 
942
1129
  def set_runtime_config(self, runtime_config: RuntimeConfig):
@@ -961,7 +1148,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
961
1148
  Return the ID of the pipeline.
962
1149
  """
963
1150
 
964
- self.refresh()
1151
+ self.refresh(PipelineFieldSelector.STATUS)
965
1152
  return self._inner.id
966
1153
 
967
1154
  def description(self) -> str:
@@ -969,7 +1156,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
969
1156
  Return the description of the pipeline.
970
1157
  """
971
1158
 
972
- self.refresh()
1159
+ self.refresh(PipelineFieldSelector.STATUS)
973
1160
  return self._inner.description
974
1161
 
975
1162
  def tables(self) -> List[SQLTable]:
@@ -977,7 +1164,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
977
1164
  Return the tables of the pipeline.
978
1165
  """
979
1166
 
980
- self.refresh()
1167
+ self.refresh(PipelineFieldSelector.ALL)
981
1168
  return self._inner.tables
982
1169
 
983
1170
  def views(self) -> List[SQLView]:
@@ -985,7 +1172,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
985
1172
  Return the views of the pipeline.
986
1173
  """
987
1174
 
988
- self.refresh()
1175
+ self.refresh(PipelineFieldSelector.ALL)
989
1176
  return self._inner.views
990
1177
 
991
1178
  def created_at(self) -> datetime:
@@ -993,7 +1180,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
993
1180
  Return the creation time of the pipeline.
994
1181
  """
995
1182
 
996
- self.refresh()
1183
+ self.refresh(PipelineFieldSelector.STATUS)
997
1184
  return datetime.fromisoformat(self._inner.created_at)
998
1185
 
999
1186
  def version(self) -> int:
@@ -1001,7 +1188,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
1001
1188
  Return the version of the pipeline.
1002
1189
  """
1003
1190
 
1004
- self.refresh()
1191
+ self.refresh(PipelineFieldSelector.STATUS)
1005
1192
  return self._inner.version
1006
1193
 
1007
1194
  def program_version(self) -> int:
@@ -1009,7 +1196,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
1009
1196
  Return the program version of the pipeline.
1010
1197
  """
1011
1198
 
1012
- self.refresh()
1199
+ self.refresh(PipelineFieldSelector.STATUS)
1013
1200
  return self._inner.program_version
1014
1201
 
1015
1202
  def deployment_status_since(self) -> datetime:
@@ -1018,7 +1205,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
1018
1205
  was set.
1019
1206
  """
1020
1207
 
1021
- self.refresh()
1208
+ self.refresh(PipelineFieldSelector.STATUS)
1022
1209
  return datetime.fromisoformat(self._inner.deployment_status_since)
1023
1210
 
1024
1211
  def deployment_config(self) -> Mapping[str, Any]:
@@ -1026,17 +1213,63 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
1026
1213
  Return the deployment config of the pipeline.
1027
1214
  """
1028
1215
 
1029
- self.refresh()
1216
+ self.refresh(PipelineFieldSelector.ALL)
1030
1217
  return self._inner.deployment_config
1031
1218
 
1032
- def deployment_desired_status(self) -> PipelineStatus:
1219
+ def deployment_desired_status(self) -> DeploymentDesiredStatus:
1033
1220
  """
1034
1221
  Return the desired deployment status of the pipeline.
1035
1222
  This is the next state that the pipeline should transition to.
1036
1223
  """
1037
1224
 
1038
- self.refresh()
1039
- return PipelineStatus.from_str(self._inner.deployment_desired_status)
1225
+ self.refresh(PipelineFieldSelector.STATUS)
1226
+ return DeploymentDesiredStatus.from_str(self._inner.deployment_desired_status)
1227
+
1228
+ def deployment_resources_desired_status(self) -> DeploymentResourcesDesiredStatus:
1229
+ """
1230
+ Return the desired status of the the deployment resources.
1231
+ """
1232
+
1233
+ self.refresh(PipelineFieldSelector.STATUS)
1234
+ return DeploymentResourcesDesiredStatus.from_str(
1235
+ self._inner.deployment_resources_desired_status
1236
+ )
1237
+
1238
+ def deployment_resources_status(self) -> DeploymentResourcesStatus:
1239
+ """
1240
+ Return the status of the deployment resources.
1241
+ """
1242
+
1243
+ self.refresh(PipelineFieldSelector.STATUS)
1244
+ return DeploymentResourcesStatus.from_str(
1245
+ self._inner.deployment_resources_status
1246
+ )
1247
+
1248
+ def deployment_runtime_desired_status(self) -> DeploymentRuntimeDesiredStatus:
1249
+ """
1250
+ Return the deployment runtime desired status.
1251
+ """
1252
+
1253
+ self.refresh(PipelineFieldSelector.STATUS)
1254
+ return DeploymentRuntimeDesiredStatus.from_str(
1255
+ self._inner.deployment_runtime_desired_status
1256
+ )
1257
+
1258
+ def deployment_runtime_status(self) -> DeploymentRuntimeStatus:
1259
+ """
1260
+ Return the deployment runtime status.
1261
+ """
1262
+
1263
+ self.refresh(PipelineFieldSelector.STATUS)
1264
+ return DeploymentRuntimeStatus.from_str(self._inner.deployment_runtime_status)
1265
+
1266
+ def deployment_runtime_status_details(self) -> Optional[dict]:
1267
+ """
1268
+ Return the deployment runtime status details.
1269
+ """
1270
+
1271
+ self.refresh(PipelineFieldSelector.STATUS)
1272
+ return self._inner.deployment_runtime_status_details
1040
1273
 
1041
1274
  def deployment_error(self) -> Mapping[str, Any]:
1042
1275
  """
@@ -1044,7 +1277,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
1044
1277
  Returns an empty string if there is no error.
1045
1278
  """
1046
1279
 
1047
- self.refresh()
1280
+ self.refresh(PipelineFieldSelector.STATUS)
1048
1281
  return self._inner.deployment_error
1049
1282
 
1050
1283
  def deployment_location(self) -> str:
@@ -1054,7 +1287,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
1054
1287
  at runtime (a TCP port number or a URI).
1055
1288
  """
1056
1289
 
1057
- self.refresh()
1290
+ self.refresh(PipelineFieldSelector.STATUS)
1058
1291
  return self._inner.deployment_location
1059
1292
 
1060
1293
  def program_info(self) -> Mapping[str, Any]:
@@ -1065,7 +1298,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
1065
1298
  and the SQL program schema.
1066
1299
  """
1067
1300
 
1068
- self.refresh()
1301
+ self.refresh(PipelineFieldSelector.ALL)
1069
1302
  return self._inner.program_info
1070
1303
 
1071
1304
  def program_error(self) -> Mapping[str, Any]:
@@ -1075,7 +1308,7 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
1075
1308
  `sql_compilation` and `rust_compilation` will be 0.
1076
1309
  """
1077
1310
 
1078
- self.refresh()
1311
+ self.refresh(PipelineFieldSelector.ALL)
1079
1312
  return self._inner.program_error
1080
1313
 
1081
1314
  def errors(self) -> List[Mapping[str, Any]]:
@@ -1159,3 +1392,31 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
1159
1392
  print(f"Support bundle written to {path}")
1160
1393
 
1161
1394
  return support_bundle_bytes
1395
+
1396
+ def generate_completion_token(self, table_name: str, connector_name: str) -> str:
1397
+ """
1398
+ Returns a completion token that can be passed to :meth:`.Pipeline.completion_token_status` to
1399
+ check whether the pipeline has finished processing all inputs received from the connector before
1400
+ the token was generated.
1401
+ """
1402
+
1403
+ return self.client.generate_completion_token(
1404
+ self.name, table_name, connector_name
1405
+ )
1406
+
1407
+ def completion_token_status(self, token: str) -> CompletionTokenStatus:
1408
+ """
1409
+ Returns the status of the completion token.
1410
+ """
1411
+
1412
+ if self.client.completion_token_processed(self.name, token):
1413
+ return CompletionTokenStatus.COMPLETE
1414
+ else:
1415
+ return CompletionTokenStatus.IN_PROGRESS
1416
+
1417
+ def wait_for_token(self, token: str):
1418
+ """
1419
+ Blocks until the pipeline processes all inputs represented by the completion token.
1420
+ """
1421
+
1422
+ self.client.wait_for_token(self.name, token)