feldera 0.131.0__py3-none-any.whl → 0.192.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of feldera might be problematic. Click here for more details.

feldera/__init__.py CHANGED
@@ -1,9 +1,9 @@
1
1
  from feldera.rest.feldera_client import FelderaClient as FelderaClient
2
2
  from feldera.pipeline import Pipeline as Pipeline
3
3
  from feldera.pipeline_builder import PipelineBuilder as PipelineBuilder
4
- from feldera.rest._helpers import client_version
4
+ from feldera.rest._helpers import determine_client_version
5
5
 
6
- __version__ = client_version()
6
+ __version__ = determine_client_version()
7
7
 
8
8
  import pretty_errors
9
9
 
@@ -1,16 +1,13 @@
1
- from enum import Enum
2
- from threading import Thread
3
- from typing import Callable, Optional
4
- from queue import Queue, Empty
1
+ from threading import Thread, Event
2
+ from typing import Callable, List, Optional, Mapping, Any
5
3
 
6
4
  import pandas as pd
7
5
  from feldera import FelderaClient
8
6
  from feldera._helpers import dataframe_from_response
9
-
10
-
11
- class _CallbackRunnerInstruction(Enum):
12
- PipelineStarted = 1
13
- RanToCompletion = 2
7
+ from feldera.enums import PipelineFieldSelector
8
+ from feldera.rest.sql_table import SQLTable
9
+ from feldera.rest.sql_view import SQLView
10
+ from feldera.rest.pipeline import Pipeline
14
11
 
15
12
 
16
13
  class CallbackRunner(Thread):
@@ -20,16 +17,51 @@ class CallbackRunner(Thread):
20
17
  pipeline_name: str,
21
18
  view_name: str,
22
19
  callback: Callable[[pd.DataFrame, int], None],
23
- queue: Optional[Queue],
20
+ exception_callback: Callable[[BaseException], None],
21
+ event: Event,
24
22
  ):
23
+ """
24
+ :param client: The :class:`.FelderaClient` to use.
25
+ :param pipeline_name: The name of the current pipeline.
26
+ :param view_name: The name of the view we are listening to.
27
+ :param callback: The callback function to call on the data we receive.
28
+ :param exception_callback: The callback function to call when an exception occurs.
29
+ :param event: The event to wait for before starting the callback runner.
30
+ """
31
+
25
32
  super().__init__()
26
33
  self.daemon = True
27
34
  self.client: FelderaClient = client
28
35
  self.pipeline_name: str = pipeline_name
29
36
  self.view_name: str = view_name
30
37
  self.callback: Callable[[pd.DataFrame, int], None] = callback
31
- self.queue: Optional[Queue] = queue
32
- self.schema: Optional[dict] = None
38
+ self.exception_callback: Callable[[BaseException], None] = exception_callback
39
+ self.event: Event = event
40
+
41
+ self.pipeline: Pipeline = self.client.get_pipeline(
42
+ self.pipeline_name, PipelineFieldSelector.ALL
43
+ )
44
+
45
+ view_schema = None
46
+
47
+ schemas: List[SQLTable | SQLView] = self.pipeline.tables + self.pipeline.views
48
+ for schema in schemas:
49
+ if schema.name == self.view_name:
50
+ view_schema = schema
51
+ break
52
+
53
+ if view_schema is None:
54
+ raise ValueError(
55
+ f"Table or View {self.view_name} not found in the pipeline schema."
56
+ )
57
+
58
+ self.schema: SQLTable | SQLView = view_schema
59
+
60
+ def to_callback(self, chunk: Mapping[str, Any]):
61
+ data: Optional[list[Mapping[str, Any]]] = chunk.get("json_data")
62
+ seq_no: Optional[int] = chunk.get("sequence_number")
63
+ if data is not None and seq_no is not None:
64
+ self.callback(dataframe_from_response([data], self.schema.fields), seq_no)
33
65
 
34
66
  def run(self):
35
67
  """
@@ -38,82 +70,26 @@ class CallbackRunner(Thread):
38
70
  :meta private:
39
71
  """
40
72
 
41
- pipeline = self.client.get_pipeline(self.pipeline_name)
42
- schema = pipeline.program_info["schema"]
73
+ try:
74
+ gen_obj = self.client.listen_to_pipeline(
75
+ self.pipeline_name,
76
+ self.view_name,
77
+ format="json",
78
+ case_sensitive=self.schema.case_sensitive,
79
+ )
43
80
 
44
- if schema:
45
- schemas = [relation for relation in schema["inputs"] + schema["outputs"]]
46
- for schema in schemas:
47
- if schema["name"] == self.view_name:
48
- self.schema = schema
49
- break
81
+ iterator = gen_obj()
50
82
 
51
- if self.schema is None:
52
- raise ValueError(
53
- f"Table or View {self.view_name} not found in the pipeline schema."
54
- )
83
+ # Trigger the HTTP call
84
+ chunk = next(iterator)
85
+
86
+ # Unblock the main thread
87
+ self.event.set()
88
+
89
+ self.to_callback(chunk)
90
+
91
+ for chunk in iterator:
92
+ self.to_callback(chunk)
55
93
 
56
- # by default, we assume that the pipeline has been started
57
- ack = _CallbackRunnerInstruction.PipelineStarted
58
-
59
- # if there is Queue, we wait for the instruction to start the pipeline
60
- # this means that we are listening to the pipeline before running it, therefore, all data should be received
61
- if self.queue:
62
- ack = self.queue.get()
63
-
64
- match ack:
65
- # if the pipeline has actually been started, we start a listener
66
- case _CallbackRunnerInstruction.PipelineStarted:
67
- # listen to the pipeline
68
- gen_obj = self.client.listen_to_pipeline(
69
- self.pipeline_name, self.view_name, format="json"
70
- )
71
-
72
- # if there is a queue set up, inform the main thread that the listener has been started, and it can
73
- # proceed with starting the pipeline
74
- if self.queue:
75
- # stop blocking the main thread on `join` for the previous message
76
- self.queue.task_done()
77
-
78
- iterator = gen_obj()
79
-
80
- for chunk in iterator:
81
- chunk: dict = chunk
82
- data: Optional[list[dict]] = chunk.get("json_data")
83
- seq_no: Optional[int] = chunk.get("sequence_number")
84
- if data is not None and seq_no is not None:
85
- self.callback(
86
- dataframe_from_response([data], self.schema), seq_no
87
- )
88
-
89
- if self.queue:
90
- try:
91
- # if a non-blocking way, check if the queue has received further instructions
92
- # this should be a RanToCompletion instruction, which means that the pipeline has been
93
- # completed
94
- again_ack = self.queue.get_nowait()
95
-
96
- # if the queue has received a message
97
- if again_ack:
98
- match again_ack:
99
- case _CallbackRunnerInstruction.RanToCompletion:
100
- # stop blocking the main thread on `join` and return from this thread
101
- self.queue.task_done()
102
-
103
- return
104
-
105
- case _CallbackRunnerInstruction.PipelineStarted:
106
- # if the pipeline has been started again, which shouldn't happen,
107
- # ignore it and continue listening, call `task_done` to avoid blocking the main
108
- # thread on `join`
109
- self.queue.task_done()
110
-
111
- continue
112
- except Empty:
113
- # if the queue is empty, continue listening
114
- continue
115
-
116
- case _CallbackRunnerInstruction.RanToCompletion:
117
- if self.queue:
118
- self.queue.task_done()
119
- return
94
+ except BaseException as e:
95
+ self.exception_callback(e)
feldera/_helpers.py CHANGED
@@ -2,6 +2,7 @@ import uuid
2
2
 
3
3
  import pandas as pd
4
4
  from decimal import Decimal
5
+ from typing import Mapping, Any
5
6
 
6
7
 
7
8
  def sql_type_to_pandas_type(sql_type: str):
@@ -60,9 +61,14 @@ def ensure_dataframe_has_columns(df: pd.DataFrame):
60
61
  )
61
62
 
62
63
 
63
- def dataframe_from_response(buffer: list[list[dict]], schema: dict):
64
+ def dataframe_from_response(
65
+ buffer: list[list[Mapping[str, Any]]], fields: list[Mapping[str, Any]]
66
+ ):
64
67
  """
65
68
  Converts the response from Feldera to a pandas DataFrame.
69
+
70
+ :param buffer: A buffer of a list of JSON formatted output of the view you are listening to.
71
+ :param fields: The schema (list of fields) of the view you are listening to.
66
72
  """
67
73
 
68
74
  pd_schema = {}
@@ -70,7 +76,7 @@ def dataframe_from_response(buffer: list[list[dict]], schema: dict):
70
76
  decimal_col = []
71
77
  uuid_col = []
72
78
 
73
- for column in schema["fields"]:
79
+ for column in fields:
74
80
  column_name = column["name"]
75
81
  if not column["case_sensitive"]:
76
82
  column_name = column_name.lower()
feldera/enums.py CHANGED
@@ -34,137 +34,164 @@ class BuildMode(Enum):
34
34
  GET_OR_CREATE = 3
35
35
 
36
36
 
37
- class PipelineStatus(Enum):
37
+ class DeploymentDesiredStatus(Enum):
38
+ """
39
+ Deployment desired status of the pipeline.
38
40
  """
39
- Represents the state that this pipeline is currently in.
40
41
 
41
- .. code-block:: text
42
+ STOPPED = 0
43
+ UNAVAILABLE = 1
44
+ STANDBY = 2
45
+ PAUSED = 3
46
+ RUNNING = 4
47
+ SUSPENDED = 5
42
48
 
43
- Stopped ◄─────────── Stopping ◄───── All states can transition
44
- │ ▲ to Stopping by either:
45
- /start or /pause │ │ (1) user calling /stop?force=true, or;
46
- ▼ │ (2) pipeline encountering a fatal
47
- ⌛Provisioning Suspending resource or runtime error,
48
- │ ▲ having the system call /stop?force=true
49
- ▼ │ /stop effectively
50
- ⌛Initializing ─────────────┤ ?force=false
51
- │ │
52
- ┌─────────┼────────────────────┴─────┐
53
- │ ▼ │
54
- │ Paused ◄──────► Unavailable │
55
- │ │ ▲ ▲ │
56
- │ /start │ │ /pause │ │
57
- │ ▼ │ │ │
58
- │ Running ◄─────────────┘ │
59
- └────────────────────────────────────┘
49
+ @staticmethod
50
+ def from_str(value):
51
+ for member in DeploymentDesiredStatus:
52
+ if member.name.lower() == value.lower():
53
+ return member
54
+ raise ValueError(
55
+ f"Unknown value '{value}' for enum {DeploymentDesiredStatus.__name__}"
56
+ )
60
57
 
61
- """
62
58
 
63
- NOT_FOUND = 0
59
+ class DeploymentResourcesDesiredStatus(Enum):
64
60
  """
65
- The pipeline has not been created yet.
61
+ The desired status of deployment resources of the pipeline.
66
62
  """
67
63
 
68
- STOPPED = 1
69
- """
70
- The pipeline has not (yet) been started or has been stopped either
71
- manually by the user or automatically by the system due to a
72
- resource or runtime error.
64
+ STOPPED = 0
65
+ PROVISIONED = 1
73
66
 
74
- The pipeline remains in this state until:
67
+ @staticmethod
68
+ def from_str(value):
69
+ for member in DeploymentResourcesDesiredStatus:
70
+ if member.name.lower() == value.lower():
71
+ return member
72
+ raise ValueError(
73
+ f"Unknown value '{value}' for enum {DeploymentResourcesDesiredStatus.__name__}"
74
+ )
75
75
 
76
- 1. The user starts it via `/start` or `/pause`, transitioning to `PROVISIONING`.
77
- 2. Early start fails (e.g., compilation failure), transitioning to `STOPPING`.
78
- """
79
76
 
80
- PROVISIONING = 2
77
+ class DeploymentResourcesStatus(Enum):
81
78
  """
82
- Compute (and optionally storage) resources needed for running the pipeline
83
- are being provisioned.
84
-
85
- The pipeline remains in this state until:
86
-
87
- 1. Resources are provisioned successfully, transitioning to `INITIALIZING`.
88
- 2. Provisioning fails or times out, transitioning to `STOPPING`.
89
- 3. The user cancels the pipeline via `/stop`, transitioning to `STOPPING`.
79
+ The desired status of deployment resources of the pipeline.
90
80
  """
91
81
 
92
- INITIALIZING = 3
93
- """
94
- The pipeline is initializing its internal state and connectors.
82
+ STOPPED = 0
83
+ PROVISIONING = 1
84
+ PROVISIONED = 2
85
+ STOPPING = 3
95
86
 
96
- The pipeline remains in this state until:
87
+ @staticmethod
88
+ def from_str(value):
89
+ for member in DeploymentResourcesStatus:
90
+ if member.name.lower() == value.lower():
91
+ return member
92
+ raise ValueError(
93
+ f"Unknown value '{value}' for enum {DeploymentResourcesStatus.__name__}"
94
+ )
97
95
 
98
- 1. Initialization succeeds, transitioning to `PAUSED`.
99
- 2. Initialization fails or times out, transitioning to `STOPPING`.
100
- 3. The user suspends the pipeline via `/suspend`, transitioning to `SUSPENDING`.
101
- 4. The user stops the pipeline via `/stop`, transitioning to `STOPPING`.
102
- """
103
96
 
104
- PAUSED = 4
97
+ class DeploymentRuntimeDesiredStatus(Enum):
105
98
  """
106
- The pipeline is initialized but data processing is paused.
107
-
108
- The pipeline remains in this state until:
109
-
110
- 1. The user starts it via `/start`, transitioning to `RUNNING`.
111
- 2. A runtime error occurs, transitioning to `STOPPING`.
112
- 3. The user suspends it via `/suspend`, transitioning to `SUSPENDING`.
113
- 4. The user stops it via `/stop`, transitioning to `STOPPING`.
99
+ Deployment runtime desired status of the pipeline.
114
100
  """
115
101
 
116
- RUNNING = 5
117
- """
118
- The pipeline is processing data.
102
+ UNAVAILABLE = 0
103
+ STANDBY = 1
104
+ PAUSED = 2
105
+ RUNNING = 3
106
+ SUSPENDED = 4
119
107
 
120
- The pipeline remains in this state until:
108
+ @staticmethod
109
+ def from_str(value):
110
+ for member in DeploymentRuntimeDesiredStatus:
111
+ if member.name.lower() == value.lower():
112
+ return member
113
+ raise ValueError(
114
+ f"Unknown value '{value}' for enum {DeploymentRuntimeDesiredStatus.__name__}"
115
+ )
121
116
 
122
- 1. The user pauses it via `/pause`, transitioning to `PAUSED`.
123
- 2. A runtime error occurs, transitioning to `STOPPING`.
124
- 3. The user suspends it via `/suspend`, transitioning to `SUSPENDING`.
125
- 4. The user stops it via `/stop`, transitioning to `STOPPING`.
126
- """
127
117
 
128
- UNAVAILABLE = 6
118
+ class DeploymentRuntimeStatus(Enum):
129
119
  """
130
- The pipeline was initialized at least once but is currently unreachable
131
- or not ready.
120
+ Deployment runtime status of the pipeline.
121
+ """
122
+
123
+ UNAVAILABLE = 0
124
+ STANDBY = 1
125
+ AWAITINGAPPROVAL = 2
126
+ INITIALIZING = 3
127
+ BOOTSTRAPPING = 4
128
+ REPLAYING = 5
129
+ PAUSED = 6
130
+ RUNNING = 7
131
+ SUSPENDED = 8
132
132
 
133
- The pipeline remains in this state until:
133
+ @staticmethod
134
+ def from_str(value):
135
+ for member in DeploymentRuntimeStatus:
136
+ if member.name.lower() == value.lower():
137
+ return member
138
+ raise ValueError(
139
+ f"Unknown value '{value}' for enum {DeploymentRuntimeStatus.__name__}"
140
+ )
134
141
 
135
- 1. A successful status check transitions it back to `PAUSED` or `RUNNING`.
136
- 2. A runtime error occurs, transitioning to `STOPPING`.
137
- 3. The user suspends it via `/suspend`, transitioning to `SUSPENDING`.
138
- 4. The user stops it via `/stop`, transitioning to `STOPPING`.
139
142
 
140
- Note: While in this state, `/start` or `/pause` express desired state but
141
- are only applied once the pipeline becomes reachable.
143
+ class PipelineStatus(Enum):
142
144
  """
143
-
144
- SUSPENDING = 7
145
+ Represents the state that this pipeline is currently in.
145
146
  """
146
- The pipeline is being suspended to storage.
147
147
 
148
- The pipeline remains in this state until:
148
+ NOT_FOUND = 0
149
+ STOPPED = 1
150
+ PROVISIONING = 2
151
+ UNAVAILABLE = 3
152
+ STANDBY = 4
153
+ AWAITINGAPPROVAL = 5
154
+ INITIALIZING = 6
155
+ BOOTSTRAPPING = 7
156
+ REPLAYING = 8
157
+ PAUSED = 9
158
+ RUNNING = 10
159
+ SUSPENDED = 11
160
+ STOPPING = 12
149
161
 
150
- 1. Suspension succeeds, transitioning to `STOPPING`.
151
- 2. A runtime error occurs, transitioning to `STOPPING`.
152
- """
162
+ @staticmethod
163
+ def from_str(value):
164
+ for member in PipelineStatus:
165
+ if member.name.lower() == value.lower():
166
+ return member
167
+ raise ValueError(f"Unknown value '{value}' for enum {PipelineStatus.__name__}")
168
+
169
+ def __eq__(self, other):
170
+ return self.value == other.value
153
171
 
154
- STOPPING = 8
155
- """
156
- The pipeline's compute resources are being scaled down to zero.
157
172
 
158
- The pipeline remains in this state until deallocation completes,
159
- transitioning to `STOPPED`.
173
+ class TransactionStatus(Enum):
174
+ """
175
+ Represents the transaction handling status of a pipeline.
160
176
  """
161
177
 
178
+ NoTransaction = 1
179
+ """There is currently no active transaction."""
180
+
181
+ TransactionInProgress = 2
182
+ """There is an active transaction in progress."""
183
+
184
+ CommitInProgress = 3
185
+ """A commit is currently in progress."""
186
+
162
187
  @staticmethod
163
188
  def from_str(value):
164
- for member in PipelineStatus:
189
+ for member in TransactionStatus:
165
190
  if member.name.lower() == value.lower():
166
191
  return member
167
- raise ValueError(f"Unknown value '{value}' for enum {PipelineStatus.__name__}")
192
+ raise ValueError(
193
+ f"Unknown value '{value}' for enum {TransactionStatus.__name__}"
194
+ )
168
195
 
169
196
  def __eq__(self, other):
170
197
  return self.value == other.value
@@ -311,3 +338,29 @@ class FaultToleranceModel(Enum):
311
338
  raise ValueError(
312
339
  f"Unknown value '{value}' for enum {FaultToleranceModel.__name__}"
313
340
  )
341
+
342
+
343
+ class PipelineFieldSelector(Enum):
344
+ ALL = "all"
345
+ """Select all fields of a pipeline."""
346
+
347
+ STATUS = "status"
348
+ """Select only the fields required to know the status of a pipeline."""
349
+
350
+
351
+ class BootstrapPolicy(Enum):
352
+ AWAIT_APPROVAL = "await_approval"
353
+ ALLOW = "allow"
354
+ REJECT = "reject"
355
+
356
+
357
+ class CompletionTokenStatus(Enum):
358
+ COMPLETE = "complete"
359
+ """
360
+ Feldera has completed processing all inputs represented by this token.
361
+ """
362
+
363
+ IN_PROGRESS = "inprogress"
364
+ """
365
+ Feldera is still processing the inputs represented by this token.
366
+ """
feldera/output_handler.py CHANGED
@@ -1,7 +1,8 @@
1
1
  import pandas as pd
2
+
2
3
  from typing import Optional
4
+ from threading import Event
3
5
 
4
- from queue import Queue
5
6
  from feldera import FelderaClient
6
7
  from feldera._callback_runner import CallbackRunner
7
8
 
@@ -12,7 +13,6 @@ class OutputHandler:
12
13
  client: FelderaClient,
13
14
  pipeline_name: str,
14
15
  view_name: str,
15
- queue: Optional[Queue],
16
16
  ):
17
17
  """
18
18
  Initializes the output handler, but doesn't start it.
@@ -22,17 +22,26 @@ class OutputHandler:
22
22
  self.client: FelderaClient = client
23
23
  self.pipeline_name: str = pipeline_name
24
24
  self.view_name: str = view_name
25
- self.queue: Optional[Queue] = queue
26
25
  self.buffer: list[pd.DataFrame] = []
26
+ self.exception: Optional[BaseException] = None
27
+ self.event = Event()
27
28
 
28
29
  # the callback that is passed to the `CallbackRunner`
29
30
  def callback(df: pd.DataFrame, _: int):
30
31
  if not df.empty:
31
32
  self.buffer.append(df)
32
33
 
34
+ def exception_callback(exception: BaseException):
35
+ self.exception = exception
36
+
33
37
  # sets up the callback runner
34
38
  self.handler = CallbackRunner(
35
- self.client, self.pipeline_name, self.view_name, callback, queue
39
+ self.client,
40
+ self.pipeline_name,
41
+ self.view_name,
42
+ callback,
43
+ exception_callback,
44
+ self.event,
36
45
  )
37
46
 
38
47
  def start(self):
@@ -41,6 +50,7 @@ class OutputHandler:
41
50
  """
42
51
 
43
52
  self.handler.start()
53
+ _ = self.event.wait()
44
54
 
45
55
  def to_pandas(self, clear_buffer: bool = True):
46
56
  """
@@ -49,6 +59,8 @@ class OutputHandler:
49
59
  :param clear_buffer: Whether to clear the buffer after getting the output.
50
60
  """
51
61
 
62
+ if self.exception is not None:
63
+ raise self.exception
52
64
  if len(self.buffer) == 0:
53
65
  return pd.DataFrame()
54
66
  res = pd.concat(self.buffer, ignore_index=True)