feldera 0.96.0__py3-none-any.whl → 0.99.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of feldera might be problematic. Click here for more details.
- feldera/enums.py +121 -104
- feldera/pipeline.py +154 -133
- feldera/pipeline_builder.py +4 -1
- feldera/rest/feldera_client.py +58 -37
- feldera/rest/pipeline.py +1 -0
- feldera/stats.py +149 -0
- {feldera-0.96.0.dist-info → feldera-0.99.0.dist-info}/METADATA +24 -6
- {feldera-0.96.0.dist-info → feldera-0.99.0.dist-info}/RECORD +10 -9
- {feldera-0.96.0.dist-info → feldera-0.99.0.dist-info}/WHEEL +0 -0
- {feldera-0.96.0.dist-info → feldera-0.99.0.dist-info}/top_level.txt +0 -0
feldera/pipeline.py
CHANGED
|
@@ -10,6 +10,7 @@ from queue import Queue
|
|
|
10
10
|
|
|
11
11
|
from feldera.rest.errors import FelderaAPIError
|
|
12
12
|
from feldera.enums import PipelineStatus, ProgramStatus, CheckpointStatus
|
|
13
|
+
from feldera.enums import StorageStatus
|
|
13
14
|
from feldera.rest.pipeline import Pipeline as InnerPipeline
|
|
14
15
|
from feldera.rest.feldera_client import FelderaClient
|
|
15
16
|
from feldera._callback_runner import _CallbackRunnerInstruction, CallbackRunner
|
|
@@ -17,6 +18,7 @@ from feldera.output_handler import OutputHandler
|
|
|
17
18
|
from feldera._helpers import ensure_dataframe_has_columns, chunk_dataframe
|
|
18
19
|
from feldera.rest.sql_table import SQLTable
|
|
19
20
|
from feldera.rest.sql_view import SQLView
|
|
21
|
+
from feldera.stats import PipelineStatistics
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
class Pipeline:
|
|
@@ -69,6 +71,11 @@ class Pipeline:
|
|
|
69
71
|
else:
|
|
70
72
|
raise err
|
|
71
73
|
|
|
74
|
+
def stats(self) -> PipelineStatistics:
|
|
75
|
+
"""Gets the pipeline metrics and performance counters."""
|
|
76
|
+
|
|
77
|
+
return PipelineStatistics.from_dict(self.client.get_pipeline_stats(self.name))
|
|
78
|
+
|
|
72
79
|
def input_pandas(self, table_name: str, df: pandas.DataFrame, force: bool = False):
|
|
73
80
|
"""
|
|
74
81
|
Push all rows in a pandas DataFrame to the pipeline.
|
|
@@ -209,7 +216,7 @@ class Pipeline:
|
|
|
209
216
|
Follow the change stream (i.e., the output) of the provided view.
|
|
210
217
|
Returns an output handler to read the changes.
|
|
211
218
|
|
|
212
|
-
When the pipeline is
|
|
219
|
+
When the pipeline is stopped, these listeners are dropped.
|
|
213
220
|
|
|
214
221
|
You must call this method before starting the pipeline to get the entire output of the view.
|
|
215
222
|
If this method is called once the pipeline has started, you will only get the output from that point onwards.
|
|
@@ -263,24 +270,26 @@ class Pipeline:
|
|
|
263
270
|
handler.start()
|
|
264
271
|
|
|
265
272
|
def wait_for_completion(
|
|
266
|
-
self,
|
|
273
|
+
self, force_stop: bool = False, timeout_s: Optional[float] = None
|
|
267
274
|
):
|
|
268
275
|
"""
|
|
269
276
|
Block until the pipeline has completed processing all input records.
|
|
270
277
|
|
|
271
|
-
This method blocks until (1) all input connectors attached to the
|
|
272
|
-
have finished reading their input data sources and issued
|
|
273
|
-
notifications to the pipeline, and (2) all inputs received
|
|
274
|
-
connectors have been fully processed and corresponding
|
|
275
|
-
sent out through the output connectors.
|
|
278
|
+
This method blocks until (1) all input connectors attached to the
|
|
279
|
+
pipeline have finished reading their input data sources and issued
|
|
280
|
+
end-of-input notifications to the pipeline, and (2) all inputs received
|
|
281
|
+
from these connectors have been fully processed and corresponding
|
|
282
|
+
outputs have been sent out through the output connectors.
|
|
276
283
|
|
|
277
284
|
This method will block indefinitely if at least one of the input
|
|
278
285
|
connectors attached to the pipeline is a streaming connector, such as
|
|
279
286
|
Kafka, that does not issue the end-of-input notification.
|
|
280
287
|
|
|
281
|
-
:param
|
|
282
|
-
|
|
283
|
-
|
|
288
|
+
:param force_stop: If True, the pipeline will be forcibly stopped after
|
|
289
|
+
completion. False by default. No checkpoints will be made.
|
|
290
|
+
:param timeout_s: Optional. The maximum time (in seconds) to wait for
|
|
291
|
+
the pipeline to complete. The default is None, which means wait
|
|
292
|
+
indefinitely.
|
|
284
293
|
|
|
285
294
|
:raises RuntimeError: If the pipeline returns unknown metrics.
|
|
286
295
|
"""
|
|
@@ -309,40 +318,18 @@ class Pipeline:
|
|
|
309
318
|
}s, timeout: {timeout_s}s"
|
|
310
319
|
)
|
|
311
320
|
|
|
312
|
-
|
|
313
|
-
"global_metrics"
|
|
314
|
-
)
|
|
315
|
-
pipeline_complete: bool = metrics.get("pipeline_complete")
|
|
316
|
-
|
|
321
|
+
pipeline_complete: bool = self.stats().global_metrics.pipeline_complete
|
|
317
322
|
if pipeline_complete is None:
|
|
318
323
|
raise RuntimeError(
|
|
319
324
|
"received unknown metrics from the pipeline, pipeline_complete is None"
|
|
320
325
|
)
|
|
321
|
-
|
|
322
|
-
if pipeline_complete:
|
|
326
|
+
elif pipeline_complete:
|
|
323
327
|
break
|
|
324
328
|
|
|
325
329
|
time.sleep(1)
|
|
326
330
|
|
|
327
|
-
if
|
|
328
|
-
self.
|
|
329
|
-
|
|
330
|
-
def __failed_check(self, next):
|
|
331
|
-
"""
|
|
332
|
-
Checks if the pipeline is in FAILED state and raises an error if it is.
|
|
333
|
-
:meta private:
|
|
334
|
-
"""
|
|
335
|
-
status = self.status()
|
|
336
|
-
if status == PipelineStatus.FAILED:
|
|
337
|
-
deployment_error = self.client.get_pipeline(self.name).deployment_error
|
|
338
|
-
error_msg = deployment_error.get("message", "")
|
|
339
|
-
raise RuntimeError(
|
|
340
|
-
f"""Cannot {next} pipeline '{self.name}' in FAILED state.
|
|
341
|
-
The pipeline must be in SHUTDOWN state before it can be started, but it is currently in FAILED state.
|
|
342
|
-
Use `Pipeline.shutdown()` method to shut down the pipeline.
|
|
343
|
-
Error Message:
|
|
344
|
-
{error_msg}"""
|
|
345
|
-
)
|
|
331
|
+
if force_stop:
|
|
332
|
+
self.stop(force=True)
|
|
346
333
|
|
|
347
334
|
def start(self, timeout_s: Optional[float] = None):
|
|
348
335
|
"""
|
|
@@ -350,28 +337,27 @@ Error Message:
|
|
|
350
337
|
|
|
351
338
|
Starts this pipeline.
|
|
352
339
|
|
|
353
|
-
The pipeline must be in
|
|
354
|
-
If the pipeline is in any other state, an error will be raised.
|
|
355
|
-
If the pipeline is in PAUSED state, use `.meth:resume` instead.
|
|
356
|
-
If the pipeline is in FAILED state, it must be shutdown before starting it again.
|
|
340
|
+
- The pipeline must be in STOPPED state to start.
|
|
341
|
+
- If the pipeline is in any other state, an error will be raised.
|
|
342
|
+
- If the pipeline is in PAUSED state, use `.meth:resume` instead.
|
|
357
343
|
|
|
358
|
-
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
344
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
345
|
+
pipeline to start.
|
|
359
346
|
|
|
360
|
-
:raises RuntimeError: If the pipeline is not in
|
|
347
|
+
:raises RuntimeError: If the pipeline is not in STOPPED state.
|
|
361
348
|
"""
|
|
362
349
|
|
|
363
|
-
self.__failed_check("start")
|
|
364
350
|
status = self.status()
|
|
365
|
-
if status != PipelineStatus.
|
|
351
|
+
if status != PipelineStatus.STOPPED:
|
|
366
352
|
raise RuntimeError(
|
|
367
|
-
f"""Cannot start pipeline '{self.name}' in state
|
|
368
|
-
The pipeline must be in
|
|
369
|
-
You can either
|
|
370
|
-
resume a paused pipeline."""
|
|
353
|
+
f"""Cannot start pipeline '{self.name}' in state \
|
|
354
|
+
'{str(status.name)}'. The pipeline must be in STOPPED state before it can be \
|
|
355
|
+
started. You can either stop the pipeline using the `Pipeline.stop()` \
|
|
356
|
+
method or use `Pipeline.resume()` to resume a paused pipeline."""
|
|
371
357
|
)
|
|
372
358
|
|
|
373
359
|
self.client.pause_pipeline(
|
|
374
|
-
self.name, "Unable to START the pipeline
|
|
360
|
+
self.name, "Unable to START the pipeline.\n", timeout_s
|
|
375
361
|
)
|
|
376
362
|
self.__setup_output_listeners()
|
|
377
363
|
self.resume(timeout_s)
|
|
@@ -380,12 +366,15 @@ resume a paused pipeline."""
|
|
|
380
366
|
"""
|
|
381
367
|
Restarts the pipeline.
|
|
382
368
|
|
|
383
|
-
This method
|
|
369
|
+
This method forcibly **STOPS** the pipeline regardless of its current
|
|
370
|
+
state and then starts it again. No checkpoints are made when stopping
|
|
371
|
+
the pipeline.
|
|
384
372
|
|
|
385
|
-
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
373
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
374
|
+
pipeline to restart.
|
|
386
375
|
"""
|
|
387
376
|
|
|
388
|
-
self.
|
|
377
|
+
self.stop(force=True, timeout_s=timeout_s)
|
|
389
378
|
self.start(timeout_s)
|
|
390
379
|
|
|
391
380
|
def wait_for_idle(
|
|
@@ -436,18 +425,17 @@ resume a paused pipeline."""
|
|
|
436
425
|
now_s = time.monotonic()
|
|
437
426
|
|
|
438
427
|
# Metrics retrieval
|
|
439
|
-
metrics
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
total_input_records
|
|
443
|
-
total_processed_records: int | None = metrics.get("total_processed_records")
|
|
444
|
-
if total_input_records is None:
|
|
428
|
+
metrics = self.stats().global_metrics
|
|
429
|
+
total_input_records = metrics.total_input_records
|
|
430
|
+
total_processed_records = metrics.total_processed_records
|
|
431
|
+
if metrics.total_input_records is None:
|
|
445
432
|
raise RuntimeError(
|
|
446
433
|
"total_input_records is missing from the pipeline metrics"
|
|
447
434
|
)
|
|
448
|
-
if total_processed_records is None:
|
|
435
|
+
if metrics.total_processed_records is None:
|
|
449
436
|
raise RuntimeError(
|
|
450
|
-
"total_processed_records is missing from the pipeline
|
|
437
|
+
"""total_processed_records is missing from the pipeline \
|
|
438
|
+
metrics"""
|
|
451
439
|
)
|
|
452
440
|
|
|
453
441
|
# Idle check
|
|
@@ -473,40 +461,26 @@ resume a paused pipeline."""
|
|
|
473
461
|
"""
|
|
474
462
|
Pause the pipeline.
|
|
475
463
|
|
|
476
|
-
The pipeline can only transition to the PAUSED state from the RUNNING
|
|
477
|
-
If the pipeline is already paused, it will remain in the PAUSED
|
|
464
|
+
The pipeline can only transition to the PAUSED state from the RUNNING
|
|
465
|
+
state. If the pipeline is already paused, it will remain in the PAUSED
|
|
466
|
+
state.
|
|
478
467
|
|
|
479
|
-
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
480
|
-
|
|
481
|
-
:raises FelderaAPIError: If the pipeline is in FAILED state.
|
|
468
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
469
|
+
pipeline to pause.
|
|
482
470
|
"""
|
|
483
471
|
|
|
484
|
-
self.__failed_check("pause")
|
|
485
472
|
self.client.pause_pipeline(self.name, timeout_s=timeout_s)
|
|
486
473
|
|
|
487
|
-
def
|
|
474
|
+
def stop(self, force: bool, timeout_s: Optional[float] = None):
|
|
488
475
|
"""
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
Shuts down the pipeline regardless of its current state.
|
|
492
|
-
|
|
493
|
-
:param timeout_s: The maximum time (in seconds) to wait for the pipeline to shut down.
|
|
494
|
-
"""
|
|
495
|
-
|
|
496
|
-
if len(self.views_tx) > 0:
|
|
497
|
-
for _, queue in self.views_tx.pop().items():
|
|
498
|
-
# sends a message to the callback runner to stop listening
|
|
499
|
-
queue.put(_CallbackRunnerInstruction.RanToCompletion)
|
|
500
|
-
# block until the callback runner has been stopped
|
|
501
|
-
queue.join()
|
|
476
|
+
Stops the pipeline.
|
|
502
477
|
|
|
503
|
-
|
|
478
|
+
Stops the pipeline regardless of its current state.
|
|
504
479
|
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
:param timeout_s: The maximum time (in seconds) to wait for the pipeline to suspend.
|
|
480
|
+
:param force: Set True to immediately scale compute resources to zero.
|
|
481
|
+
Set False to automatically checkpoint before stopping.
|
|
482
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
483
|
+
pipeline to stop.
|
|
510
484
|
"""
|
|
511
485
|
|
|
512
486
|
if len(self.views_tx) > 0:
|
|
@@ -516,29 +490,35 @@ resume a paused pipeline."""
|
|
|
516
490
|
# block until the callback runner has been stopped
|
|
517
491
|
queue.join()
|
|
518
492
|
|
|
519
|
-
self.client.
|
|
493
|
+
self.client.stop_pipeline(self.name, force=force, timeout_s=timeout_s)
|
|
520
494
|
|
|
521
495
|
def resume(self, timeout_s: Optional[float] = None):
|
|
522
496
|
"""
|
|
523
|
-
Resumes the pipeline from the PAUSED state. If the pipeline is already
|
|
524
|
-
|
|
525
|
-
:param timeout_s: The maximum time (in seconds) to wait for the pipeline to shut down.
|
|
497
|
+
Resumes the pipeline from the PAUSED state. If the pipeline is already
|
|
498
|
+
running, it will remain in the RUNNING state.
|
|
526
499
|
|
|
527
|
-
:
|
|
500
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
501
|
+
pipeline to resume.
|
|
528
502
|
"""
|
|
529
503
|
|
|
530
|
-
self.__failed_check("resume")
|
|
531
504
|
self.client.start_pipeline(self.name, timeout_s=timeout_s)
|
|
532
505
|
|
|
533
|
-
def delete(self):
|
|
506
|
+
def delete(self, clear_storage: bool = False):
|
|
534
507
|
"""
|
|
535
508
|
Deletes the pipeline.
|
|
536
509
|
|
|
537
|
-
The pipeline must be
|
|
510
|
+
The pipeline must be stopped, and the storage cleared before it can be
|
|
511
|
+
deleted.
|
|
538
512
|
|
|
539
|
-
:
|
|
513
|
+
:param clear_storage: True if the storage should be cleared before
|
|
514
|
+
deletion. False by default
|
|
515
|
+
|
|
516
|
+
:raises FelderaAPIError: If the pipeline is not in STOPPED state or the
|
|
517
|
+
storage is still bound.
|
|
540
518
|
"""
|
|
541
519
|
|
|
520
|
+
if clear_storage:
|
|
521
|
+
self.clear_storage()
|
|
542
522
|
self.client.delete_pipeline(self.name)
|
|
543
523
|
|
|
544
524
|
@staticmethod
|
|
@@ -555,15 +535,18 @@ resume a paused pipeline."""
|
|
|
555
535
|
return Pipeline._from_inner(inner, client)
|
|
556
536
|
except FelderaAPIError as err:
|
|
557
537
|
if err.status_code == 404:
|
|
558
|
-
|
|
538
|
+
err.message = f"Pipeline with name {name} not found"
|
|
539
|
+
raise err
|
|
559
540
|
|
|
560
541
|
def checkpoint(self, wait: bool = False, timeout_s=300) -> int:
|
|
561
542
|
"""
|
|
562
543
|
Checkpoints this pipeline, if fault-tolerance is enabled.
|
|
563
|
-
Fault Tolerance in Feldera:
|
|
544
|
+
Fault Tolerance in Feldera:
|
|
545
|
+
<https://docs.feldera.com/pipelines/fault-tolerance/>
|
|
564
546
|
|
|
565
547
|
:param wait: If true, will block until the checkpoint completes.
|
|
566
|
-
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
548
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
549
|
+
checkpoint to complete.
|
|
567
550
|
|
|
568
551
|
:raises FelderaAPIError: If checkpointing is not enabled.
|
|
569
552
|
"""
|
|
@@ -579,9 +562,8 @@ resume a paused pipeline."""
|
|
|
579
562
|
elapsed = time.monotonic() - start
|
|
580
563
|
if elapsed > timeout_s:
|
|
581
564
|
raise TimeoutError(
|
|
582
|
-
f"timeout ({timeout_s}s) reached while waiting for
|
|
583
|
-
|
|
584
|
-
}' to make checkpoint '{seq}'"
|
|
565
|
+
f"""timeout ({timeout_s}s) reached while waiting for \
|
|
566
|
+
pipeline '{self.name}' to make checkpoint '{seq}'"""
|
|
585
567
|
)
|
|
586
568
|
status = self.checkpoint_status(seq)
|
|
587
569
|
if status == CheckpointStatus.InProgress:
|
|
@@ -620,8 +602,10 @@ resume a paused pipeline."""
|
|
|
620
602
|
"""
|
|
621
603
|
Syncs this checkpoint to object store.
|
|
622
604
|
|
|
623
|
-
:param wait: If true, will block until the checkpoint sync opeartion
|
|
624
|
-
|
|
605
|
+
:param wait: If true, will block until the checkpoint sync opeartion
|
|
606
|
+
completes.
|
|
607
|
+
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
608
|
+
checkpoint to complete syncing.
|
|
625
609
|
|
|
626
610
|
:raises FelderaAPIError: If no checkpoints have been made.
|
|
627
611
|
"""
|
|
@@ -637,9 +621,8 @@ resume a paused pipeline."""
|
|
|
637
621
|
elapsed = time.monotonic() - start
|
|
638
622
|
if elapsed > timeout_s:
|
|
639
623
|
raise TimeoutError(
|
|
640
|
-
f"timeout ({timeout_s}s) reached while waiting for
|
|
641
|
-
|
|
642
|
-
}' to sync checkpoint '{uuid}'"
|
|
624
|
+
f"""timeout ({timeout_s}s) reached while waiting for \
|
|
625
|
+
pipeline '{self.name}' to sync checkpoint '{uuid}'"""
|
|
643
626
|
)
|
|
644
627
|
status = self.sync_checkpoint_status(uuid)
|
|
645
628
|
if status in [CheckpointStatus.InProgress, CheckpointStatus.Unknown]:
|
|
@@ -675,20 +658,25 @@ resume a paused pipeline."""
|
|
|
675
658
|
|
|
676
659
|
def query(self, query: str) -> Generator[Mapping[str, Any], None, None]:
|
|
677
660
|
"""
|
|
678
|
-
Executes an ad-hoc SQL query on this pipeline and returns a generator
|
|
679
|
-
|
|
680
|
-
|
|
661
|
+
Executes an ad-hoc SQL query on this pipeline and returns a generator
|
|
662
|
+
that yields the rows of the result as Python dictionaries. For
|
|
663
|
+
``INSERT`` and ``DELETE`` queries, consider using :meth:`.execute`
|
|
664
|
+
instead. All floating-point numbers are deserialized as Decimal objects
|
|
665
|
+
to avoid precision loss.
|
|
681
666
|
|
|
682
667
|
Note:
|
|
683
668
|
You can only ``SELECT`` from materialized tables and views.
|
|
684
669
|
|
|
685
670
|
Important:
|
|
686
|
-
This method is lazy. It returns a generator and is not evaluated
|
|
671
|
+
This method is lazy. It returns a generator and is not evaluated
|
|
672
|
+
until you consume the result.
|
|
687
673
|
|
|
688
674
|
:param query: The SQL query to be executed.
|
|
689
|
-
:return: A generator that yields the rows of the result as Python
|
|
675
|
+
:return: A generator that yields the rows of the result as Python
|
|
676
|
+
dictionaries.
|
|
690
677
|
|
|
691
|
-
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
678
|
+
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
679
|
+
state.
|
|
692
680
|
:raises FelderaAPIError: If querying a non materialized table or view.
|
|
693
681
|
:raises FelderaAPIError: If the query is invalid.
|
|
694
682
|
"""
|
|
@@ -697,8 +685,9 @@ resume a paused pipeline."""
|
|
|
697
685
|
|
|
698
686
|
def query_parquet(self, query: str, path: str):
|
|
699
687
|
"""
|
|
700
|
-
Executes an ad-hoc SQL query on this pipeline and saves the result to
|
|
701
|
-
If the extension isn't `parquet`,
|
|
688
|
+
Executes an ad-hoc SQL query on this pipeline and saves the result to
|
|
689
|
+
the specified path as a parquet file. If the extension isn't `parquet`,
|
|
690
|
+
it will be automatically appended to `path`.
|
|
702
691
|
|
|
703
692
|
Note:
|
|
704
693
|
You can only ``SELECT`` from materialized tables and views.
|
|
@@ -706,7 +695,8 @@ resume a paused pipeline."""
|
|
|
706
695
|
:param query: The SQL query to be executed.
|
|
707
696
|
:param path: The path of the parquet file.
|
|
708
697
|
|
|
709
|
-
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
698
|
+
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
699
|
+
state.
|
|
710
700
|
:raises FelderaAPIError: If querying a non materialized table or view.
|
|
711
701
|
:raises FelderaAPIError: If the query is invalid.
|
|
712
702
|
"""
|
|
@@ -715,18 +705,22 @@ resume a paused pipeline."""
|
|
|
715
705
|
|
|
716
706
|
def query_tabular(self, query: str) -> Generator[str, None, None]:
|
|
717
707
|
"""
|
|
718
|
-
Executes a SQL query on this pipeline and returns the result as a
|
|
708
|
+
Executes a SQL query on this pipeline and returns the result as a
|
|
709
|
+
formatted string.
|
|
719
710
|
|
|
720
711
|
Note:
|
|
721
712
|
You can only ``SELECT`` from materialized tables and views.
|
|
722
713
|
|
|
723
714
|
Important:
|
|
724
|
-
This method is lazy. It returns a generator and is not evaluated
|
|
715
|
+
This method is lazy. It returns a generator and is not evaluated
|
|
716
|
+
until you consume the result.
|
|
725
717
|
|
|
726
718
|
:param query: The SQL query to be executed.
|
|
727
|
-
:return: A generator that yields a string representing the query result
|
|
719
|
+
:return: A generator that yields a string representing the query result
|
|
720
|
+
in a human-readable, tabular format.
|
|
728
721
|
|
|
729
|
-
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
722
|
+
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
723
|
+
state.
|
|
730
724
|
:raises FelderaAPIError: If querying a non materialized table or view.
|
|
731
725
|
:raises FelderaAPIError: If the query is invalid.
|
|
732
726
|
"""
|
|
@@ -735,17 +729,19 @@ resume a paused pipeline."""
|
|
|
735
729
|
|
|
736
730
|
def execute(self, query: str):
|
|
737
731
|
"""
|
|
738
|
-
Executes an ad-hoc SQL query on the current pipeline, discarding its
|
|
739
|
-
Unlike the :meth:`.query` method which returns a generator for
|
|
740
|
-
this method processes the query
|
|
732
|
+
Executes an ad-hoc SQL query on the current pipeline, discarding its
|
|
733
|
+
result. Unlike the :meth:`.query` method which returns a generator for
|
|
734
|
+
retrieving query results lazily, this method processes the query
|
|
735
|
+
eagerly and fully before returning.
|
|
741
736
|
|
|
742
|
-
This method is suitable for SQL operations like ``INSERT`` and
|
|
743
|
-
|
|
744
|
-
|
|
737
|
+
This method is suitable for SQL operations like ``INSERT`` and
|
|
738
|
+
``DELETE``, where the user needs confirmation of successful query
|
|
739
|
+
execution, but does not require the query result. If the query fails,
|
|
740
|
+
an exception will be raised.
|
|
745
741
|
|
|
746
742
|
Important:
|
|
747
|
-
If you try to ``INSERT`` or ``DELETE`` data from a table while the
|
|
748
|
-
it will block until the pipeline is resumed.
|
|
743
|
+
If you try to ``INSERT`` or ``DELETE`` data from a table while the
|
|
744
|
+
pipeline is paused, it will block until the pipeline is resumed.
|
|
749
745
|
|
|
750
746
|
:param query: The SQL query to be executed.
|
|
751
747
|
|
|
@@ -756,6 +752,16 @@ resume a paused pipeline."""
|
|
|
756
752
|
gen = self.query_tabular(query)
|
|
757
753
|
deque(gen, maxlen=0)
|
|
758
754
|
|
|
755
|
+
def clear_storage(self):
|
|
756
|
+
"""
|
|
757
|
+
Clears the storage of the pipeline if it is currently in use.
|
|
758
|
+
This action cannot be canceled, and will delete all the pipeline
|
|
759
|
+
storage.
|
|
760
|
+
"""
|
|
761
|
+
|
|
762
|
+
if self.storage_status() == StorageStatus.INUSE:
|
|
763
|
+
self.client.clear_storage(self.name)
|
|
764
|
+
|
|
759
765
|
@property
|
|
760
766
|
def name(self) -> str:
|
|
761
767
|
"""
|
|
@@ -772,12 +778,21 @@ resume a paused pipeline."""
|
|
|
772
778
|
self.refresh()
|
|
773
779
|
return self._inner.program_code
|
|
774
780
|
|
|
781
|
+
def storage_status(self) -> StorageStatus:
|
|
782
|
+
"""
|
|
783
|
+
Return the storage status of the pipeline.
|
|
784
|
+
"""
|
|
785
|
+
|
|
786
|
+
self.refresh()
|
|
787
|
+
return StorageStatus.from_str(self._inner.storage_status)
|
|
788
|
+
|
|
775
789
|
def program_status(self) -> ProgramStatus:
|
|
776
790
|
"""
|
|
777
791
|
Return the program status of the pipeline.
|
|
778
792
|
|
|
779
793
|
Program status is the status of compilation of this SQL program.
|
|
780
|
-
We first compile the SQL program to Rust code, and then compile the
|
|
794
|
+
We first compile the SQL program to Rust code, and then compile the
|
|
795
|
+
Rust code to a binary.
|
|
781
796
|
"""
|
|
782
797
|
|
|
783
798
|
self.refresh()
|
|
@@ -881,7 +896,8 @@ resume a paused pipeline."""
|
|
|
881
896
|
|
|
882
897
|
def deployment_status_since(self) -> datetime:
|
|
883
898
|
"""
|
|
884
|
-
Return the timestamp when the current deployment status of the pipeline
|
|
899
|
+
Return the timestamp when the current deployment status of the pipeline
|
|
900
|
+
was set.
|
|
885
901
|
"""
|
|
886
902
|
|
|
887
903
|
self.refresh()
|
|
@@ -916,7 +932,8 @@ resume a paused pipeline."""
|
|
|
916
932
|
def deployment_location(self) -> str:
|
|
917
933
|
"""
|
|
918
934
|
Return the deployment location of the pipeline.
|
|
919
|
-
Deployment location is the location where the pipeline can be reached
|
|
935
|
+
Deployment location is the location where the pipeline can be reached
|
|
936
|
+
at runtime (a TCP port number or a URI).
|
|
920
937
|
"""
|
|
921
938
|
|
|
922
939
|
self.refresh()
|
|
@@ -925,7 +942,8 @@ resume a paused pipeline."""
|
|
|
925
942
|
def program_binary_url(self) -> str:
|
|
926
943
|
"""
|
|
927
944
|
Return the program binary URL of the pipeline.
|
|
928
|
-
This is the URL where the compiled program binary can be downloaded
|
|
945
|
+
This is the URL where the compiled program binary can be downloaded
|
|
946
|
+
from.
|
|
929
947
|
"""
|
|
930
948
|
|
|
931
949
|
self.refresh()
|
|
@@ -934,7 +952,9 @@ resume a paused pipeline."""
|
|
|
934
952
|
def program_info(self) -> Mapping[str, Any]:
|
|
935
953
|
"""
|
|
936
954
|
Return the program info of the pipeline.
|
|
937
|
-
This is the output returned by the SQL compiler, including: the list of
|
|
955
|
+
This is the output returned by the SQL compiler, including: the list of
|
|
956
|
+
input and output connectors, the generated Rust code for the pipeline,
|
|
957
|
+
and the SQL program schema.
|
|
938
958
|
"""
|
|
939
959
|
|
|
940
960
|
self.refresh()
|
|
@@ -943,7 +963,8 @@ resume a paused pipeline."""
|
|
|
943
963
|
def program_error(self) -> Mapping[str, Any]:
|
|
944
964
|
"""
|
|
945
965
|
Return the program error of the pipeline.
|
|
946
|
-
If there are no errors, the `exit_code` field inside both
|
|
966
|
+
If there are no errors, the `exit_code` field inside both
|
|
967
|
+
`sql_compilation` and `rust_compilation` will be 0.
|
|
947
968
|
"""
|
|
948
969
|
|
|
949
970
|
self.refresh()
|
feldera/pipeline_builder.py
CHANGED
|
@@ -83,7 +83,10 @@ class PipelineBuilder:
|
|
|
83
83
|
|
|
84
84
|
try:
|
|
85
85
|
# shutdown the pipeline if it exists and is running
|
|
86
|
-
|
|
86
|
+
p = Pipeline.get(self.name, self.client)
|
|
87
|
+
p.stop(force=True)
|
|
88
|
+
p.clear_storage()
|
|
89
|
+
|
|
87
90
|
except FelderaAPIError:
|
|
88
91
|
# pipeline doesn't exist, no worries
|
|
89
92
|
pass
|