feldera 0.69.0__py3-none-any.whl → 0.189.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of feldera might be problematic. Click here for more details.
- feldera/__init__.py +3 -0
- feldera/_callback_runner.py +64 -85
- feldera/_helpers.py +8 -2
- feldera/enums.py +222 -116
- feldera/output_handler.py +16 -4
- feldera/pipeline.py +718 -261
- feldera/pipeline_builder.py +42 -19
- feldera/rest/_helpers.py +40 -0
- feldera/rest/_httprequests.py +365 -192
- feldera/rest/config.py +44 -30
- feldera/rest/errors.py +16 -0
- feldera/rest/feldera_client.py +694 -153
- feldera/rest/pipeline.py +16 -1
- feldera/runtime_config.py +32 -5
- feldera/stats.py +152 -0
- feldera/tests/test_datafusionize.py +38 -0
- feldera/testutils.py +382 -0
- feldera/testutils_oidc.py +368 -0
- feldera-0.189.0.dist-info/METADATA +163 -0
- feldera-0.189.0.dist-info/RECORD +26 -0
- feldera-0.69.0.dist-info/METADATA +0 -105
- feldera-0.69.0.dist-info/RECORD +0 -21
- {feldera-0.69.0.dist-info → feldera-0.189.0.dist-info}/WHEEL +0 -0
- {feldera-0.69.0.dist-info → feldera-0.189.0.dist-info}/top_level.txt +0 -0
feldera/__init__.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
from feldera.rest.feldera_client import FelderaClient as FelderaClient
|
|
2
2
|
from feldera.pipeline import Pipeline as Pipeline
|
|
3
3
|
from feldera.pipeline_builder import PipelineBuilder as PipelineBuilder
|
|
4
|
+
from feldera.rest._helpers import determine_client_version
|
|
5
|
+
|
|
6
|
+
__version__ = determine_client_version()
|
|
4
7
|
|
|
5
8
|
import pretty_errors
|
|
6
9
|
|
feldera/_callback_runner.py
CHANGED
|
@@ -1,16 +1,13 @@
|
|
|
1
|
-
from
|
|
2
|
-
from
|
|
3
|
-
from typing import Callable, Optional
|
|
4
|
-
from queue import Queue, Empty
|
|
1
|
+
from threading import Thread, Event
|
|
2
|
+
from typing import Callable, List, Optional, Mapping, Any
|
|
5
3
|
|
|
6
4
|
import pandas as pd
|
|
7
5
|
from feldera import FelderaClient
|
|
8
6
|
from feldera._helpers import dataframe_from_response
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
RanToCompletion = 2
|
|
7
|
+
from feldera.enums import PipelineFieldSelector
|
|
8
|
+
from feldera.rest.sql_table import SQLTable
|
|
9
|
+
from feldera.rest.sql_view import SQLView
|
|
10
|
+
from feldera.rest.pipeline import Pipeline
|
|
14
11
|
|
|
15
12
|
|
|
16
13
|
class CallbackRunner(Thread):
|
|
@@ -20,16 +17,51 @@ class CallbackRunner(Thread):
|
|
|
20
17
|
pipeline_name: str,
|
|
21
18
|
view_name: str,
|
|
22
19
|
callback: Callable[[pd.DataFrame, int], None],
|
|
23
|
-
|
|
20
|
+
exception_callback: Callable[[BaseException], None],
|
|
21
|
+
event: Event,
|
|
24
22
|
):
|
|
23
|
+
"""
|
|
24
|
+
:param client: The :class:`.FelderaClient` to use.
|
|
25
|
+
:param pipeline_name: The name of the current pipeline.
|
|
26
|
+
:param view_name: The name of the view we are listening to.
|
|
27
|
+
:param callback: The callback function to call on the data we receive.
|
|
28
|
+
:param exception_callback: The callback function to call when an exception occurs.
|
|
29
|
+
:param event: The event to wait for before starting the callback runner.
|
|
30
|
+
"""
|
|
31
|
+
|
|
25
32
|
super().__init__()
|
|
26
33
|
self.daemon = True
|
|
27
34
|
self.client: FelderaClient = client
|
|
28
35
|
self.pipeline_name: str = pipeline_name
|
|
29
36
|
self.view_name: str = view_name
|
|
30
37
|
self.callback: Callable[[pd.DataFrame, int], None] = callback
|
|
31
|
-
self.
|
|
32
|
-
self.
|
|
38
|
+
self.exception_callback: Callable[[BaseException], None] = exception_callback
|
|
39
|
+
self.event: Event = event
|
|
40
|
+
|
|
41
|
+
self.pipeline: Pipeline = self.client.get_pipeline(
|
|
42
|
+
self.pipeline_name, PipelineFieldSelector.ALL
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
view_schema = None
|
|
46
|
+
|
|
47
|
+
schemas: List[SQLTable | SQLView] = self.pipeline.tables + self.pipeline.views
|
|
48
|
+
for schema in schemas:
|
|
49
|
+
if schema.name == self.view_name:
|
|
50
|
+
view_schema = schema
|
|
51
|
+
break
|
|
52
|
+
|
|
53
|
+
if view_schema is None:
|
|
54
|
+
raise ValueError(
|
|
55
|
+
f"Table or View {self.view_name} not found in the pipeline schema."
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
self.schema: SQLTable | SQLView = view_schema
|
|
59
|
+
|
|
60
|
+
def to_callback(self, chunk: Mapping[str, Any]):
|
|
61
|
+
data: Optional[list[Mapping[str, Any]]] = chunk.get("json_data")
|
|
62
|
+
seq_no: Optional[int] = chunk.get("sequence_number")
|
|
63
|
+
if data is not None and seq_no is not None:
|
|
64
|
+
self.callback(dataframe_from_response([data], self.schema.fields), seq_no)
|
|
33
65
|
|
|
34
66
|
def run(self):
|
|
35
67
|
"""
|
|
@@ -38,79 +70,26 @@ class CallbackRunner(Thread):
|
|
|
38
70
|
:meta private:
|
|
39
71
|
"""
|
|
40
72
|
|
|
41
|
-
|
|
42
|
-
|
|
73
|
+
try:
|
|
74
|
+
gen_obj = self.client.listen_to_pipeline(
|
|
75
|
+
self.pipeline_name,
|
|
76
|
+
self.view_name,
|
|
77
|
+
format="json",
|
|
78
|
+
case_sensitive=self.schema.case_sensitive,
|
|
79
|
+
)
|
|
43
80
|
|
|
44
|
-
|
|
45
|
-
schemas = [relation for relation in schema["inputs"] + schema["outputs"]]
|
|
46
|
-
for schema in schemas:
|
|
47
|
-
if schema["name"] == self.view_name:
|
|
48
|
-
self.schema = schema
|
|
49
|
-
break
|
|
81
|
+
iterator = gen_obj()
|
|
50
82
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
83
|
+
# Trigger the HTTP call
|
|
84
|
+
chunk = next(iterator)
|
|
85
|
+
|
|
86
|
+
# Unblock the main thread
|
|
87
|
+
self.event.set()
|
|
88
|
+
|
|
89
|
+
self.to_callback(chunk)
|
|
90
|
+
|
|
91
|
+
for chunk in iterator:
|
|
92
|
+
self.to_callback(chunk)
|
|
55
93
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
# if there is Queue, we wait for the instruction to start the pipeline
|
|
60
|
-
# this means that we are listening to the pipeline before running it, therefore, all data should be received
|
|
61
|
-
if self.queue:
|
|
62
|
-
ack: _CallbackRunnerInstruction = self.queue.get()
|
|
63
|
-
|
|
64
|
-
match ack:
|
|
65
|
-
# if the pipeline has actually been started, we start a listener
|
|
66
|
-
case _CallbackRunnerInstruction.PipelineStarted:
|
|
67
|
-
# listen to the pipeline
|
|
68
|
-
gen_obj = self.client.listen_to_pipeline(
|
|
69
|
-
self.pipeline_name, self.view_name, format="json"
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
# if there is a queue set up, inform the main thread that the listener has been started, and it can
|
|
73
|
-
# proceed with starting the pipeline
|
|
74
|
-
if self.queue:
|
|
75
|
-
# stop blocking the main thread on `join` for the previous message
|
|
76
|
-
self.queue.task_done()
|
|
77
|
-
|
|
78
|
-
for chunk in gen_obj:
|
|
79
|
-
chunk: dict = chunk
|
|
80
|
-
data: list[dict] = chunk.get("json_data")
|
|
81
|
-
seq_no: int = chunk.get("sequence_number")
|
|
82
|
-
|
|
83
|
-
if data is not None:
|
|
84
|
-
self.callback(dataframe_from_response([data], schema), seq_no)
|
|
85
|
-
|
|
86
|
-
if self.queue:
|
|
87
|
-
try:
|
|
88
|
-
# if a non-blocking way, check if the queue has received further instructions
|
|
89
|
-
# this should be a RanToCompletion instruction, which means that the pipeline has been
|
|
90
|
-
# completed
|
|
91
|
-
again_ack = self.queue.get_nowait()
|
|
92
|
-
|
|
93
|
-
# if the queue has received a message
|
|
94
|
-
if again_ack:
|
|
95
|
-
match again_ack:
|
|
96
|
-
case _CallbackRunnerInstruction.RanToCompletion:
|
|
97
|
-
# stop blocking the main thread on `join` and return from this thread
|
|
98
|
-
self.queue.task_done()
|
|
99
|
-
|
|
100
|
-
return
|
|
101
|
-
|
|
102
|
-
case _CallbackRunnerInstruction.PipelineStarted:
|
|
103
|
-
# if the pipeline has been started again, which shouldn't happen,
|
|
104
|
-
# ignore it and continue listening, call `task_done` to avoid blocking the main
|
|
105
|
-
# thread on `join`
|
|
106
|
-
self.queue.task_done()
|
|
107
|
-
|
|
108
|
-
continue
|
|
109
|
-
except Empty:
|
|
110
|
-
# if the queue is empty, continue listening
|
|
111
|
-
continue
|
|
112
|
-
|
|
113
|
-
case _CallbackRunnerInstruction.RanToCompletion:
|
|
114
|
-
if self.queue:
|
|
115
|
-
self.queue.task_done()
|
|
116
|
-
return
|
|
94
|
+
except BaseException as e:
|
|
95
|
+
self.exception_callback(e)
|
feldera/_helpers.py
CHANGED
|
@@ -2,6 +2,7 @@ import uuid
|
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
from decimal import Decimal
|
|
5
|
+
from typing import Mapping, Any
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
def sql_type_to_pandas_type(sql_type: str):
|
|
@@ -60,9 +61,14 @@ def ensure_dataframe_has_columns(df: pd.DataFrame):
|
|
|
60
61
|
)
|
|
61
62
|
|
|
62
63
|
|
|
63
|
-
def dataframe_from_response(
|
|
64
|
+
def dataframe_from_response(
|
|
65
|
+
buffer: list[list[Mapping[str, Any]]], fields: list[Mapping[str, Any]]
|
|
66
|
+
):
|
|
64
67
|
"""
|
|
65
68
|
Converts the response from Feldera to a pandas DataFrame.
|
|
69
|
+
|
|
70
|
+
:param buffer: A buffer of a list of JSON formatted output of the view you are listening to.
|
|
71
|
+
:param fields: The schema (list of fields) of the view you are listening to.
|
|
66
72
|
"""
|
|
67
73
|
|
|
68
74
|
pd_schema = {}
|
|
@@ -70,7 +76,7 @@ def dataframe_from_response(buffer: list[list[dict]], schema: dict):
|
|
|
70
76
|
decimal_col = []
|
|
71
77
|
uuid_col = []
|
|
72
78
|
|
|
73
|
-
for column in
|
|
79
|
+
for column in fields:
|
|
74
80
|
column_name = column["name"]
|
|
75
81
|
if not column["case_sensitive"]:
|
|
76
82
|
column_name = column_name.lower()
|
feldera/enums.py
CHANGED
|
@@ -34,165 +34,164 @@ class BuildMode(Enum):
|
|
|
34
34
|
GET_OR_CREATE = 3
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
class
|
|
37
|
+
class DeploymentDesiredStatus(Enum):
|
|
38
38
|
"""
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
.. code-block:: text
|
|
42
|
-
|
|
43
|
-
Shutdown ◄────┐
|
|
44
|
-
│ │
|
|
45
|
-
/deploy │ │
|
|
46
|
-
│ ⌛ShuttingDown
|
|
47
|
-
▼ ▲
|
|
48
|
-
⌛Provisioning │
|
|
49
|
-
│ │
|
|
50
|
-
Provisioned │
|
|
51
|
-
▼ │/shutdown
|
|
52
|
-
⌛Initializing │
|
|
53
|
-
│ │
|
|
54
|
-
┌────────┴─────────┴─┐
|
|
55
|
-
│ ▼ │
|
|
56
|
-
│ Paused │
|
|
57
|
-
│ │ ▲ │
|
|
58
|
-
│/start│ │/pause │
|
|
59
|
-
│ ▼ │ │
|
|
60
|
-
│ Running │
|
|
61
|
-
└──────────┬─────────┘
|
|
62
|
-
│
|
|
63
|
-
▼
|
|
64
|
-
Failed
|
|
39
|
+
Deployment desired status of the pipeline.
|
|
65
40
|
"""
|
|
66
41
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
42
|
+
STOPPED = 0
|
|
43
|
+
UNAVAILABLE = 1
|
|
44
|
+
STANDBY = 2
|
|
45
|
+
PAUSED = 3
|
|
46
|
+
RUNNING = 4
|
|
47
|
+
SUSPENDED = 5
|
|
71
48
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
49
|
+
@staticmethod
|
|
50
|
+
def from_str(value):
|
|
51
|
+
for member in DeploymentDesiredStatus:
|
|
52
|
+
if member.name.lower() == value.lower():
|
|
53
|
+
return member
|
|
54
|
+
raise ValueError(
|
|
55
|
+
f"Unknown value '{value}' for enum {DeploymentDesiredStatus.__name__}"
|
|
56
|
+
)
|
|
75
57
|
|
|
76
|
-
The pipeline remains in this state until the user triggers
|
|
77
|
-
a deployment by invoking the `/deploy` endpoint.
|
|
78
|
-
"""
|
|
79
58
|
|
|
80
|
-
|
|
59
|
+
class DeploymentResourcesDesiredStatus(Enum):
|
|
60
|
+
"""
|
|
61
|
+
The desired status of deployment resources of the pipeline.
|
|
81
62
|
"""
|
|
82
|
-
The runner triggered a deployment of the pipeline and is
|
|
83
|
-
waiting for the pipeline HTTP server to come up.
|
|
84
63
|
|
|
85
|
-
|
|
86
|
-
|
|
64
|
+
STOPPED = 0
|
|
65
|
+
PROVISIONED = 1
|
|
87
66
|
|
|
88
|
-
|
|
89
|
-
|
|
67
|
+
@staticmethod
|
|
68
|
+
def from_str(value):
|
|
69
|
+
for member in DeploymentResourcesDesiredStatus:
|
|
70
|
+
if member.name.lower() == value.lower():
|
|
71
|
+
return member
|
|
72
|
+
raise ValueError(
|
|
73
|
+
f"Unknown value '{value}' for enum {DeploymentResourcesDesiredStatus.__name__}"
|
|
74
|
+
)
|
|
90
75
|
|
|
91
|
-
1. Its HTTP server is up and running; the pipeline transitions to the
|
|
92
|
-
`PipelineStatus.INITIALIZING` state.
|
|
93
|
-
2. A pre-defined timeout has passed. The runner performs forced
|
|
94
|
-
shutdown of the pipeline; returns to the `PipelineStatus.SHUTDOWN` state.
|
|
95
|
-
3. The user cancels the pipeline by invoking the `/shutdown` endpoint.
|
|
96
|
-
The manager performs forced shutdown of the pipeline, returns to the
|
|
97
|
-
`PipelineStatus.SHUTDOWN` state.
|
|
98
76
|
|
|
77
|
+
class DeploymentResourcesStatus(Enum):
|
|
99
78
|
"""
|
|
100
|
-
|
|
101
|
-
INITIALIZING = 4
|
|
79
|
+
The desired status of deployment resources of the pipeline.
|
|
102
80
|
"""
|
|
103
|
-
The pipeline is initializing its internal state and connectors.
|
|
104
81
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
82
|
+
STOPPED = 0
|
|
83
|
+
PROVISIONING = 1
|
|
84
|
+
PROVISIONED = 2
|
|
85
|
+
STOPPING = 3
|
|
108
86
|
|
|
109
|
-
|
|
87
|
+
@staticmethod
|
|
88
|
+
def from_str(value):
|
|
89
|
+
for member in DeploymentResourcesStatus:
|
|
90
|
+
if member.name.lower() == value.lower():
|
|
91
|
+
return member
|
|
92
|
+
raise ValueError(
|
|
93
|
+
f"Unknown value '{value}' for enum {DeploymentResourcesStatus.__name__}"
|
|
94
|
+
)
|
|
110
95
|
|
|
111
|
-
1. Initialization completes successfully; the pipeline transitions to the
|
|
112
|
-
`PipelineStatus.PAUSED` state.
|
|
113
|
-
2. Initialization fails; transitions to the `PipelineStatus.FAILED` state.
|
|
114
|
-
3. A pre-defined timeout has passed. The runner performs forced
|
|
115
|
-
shutdown of the pipeline; returns to the `PipelineStatus.SHUTDOWN` state.
|
|
116
|
-
4. The user cancels the pipeline by invoking the `/shutdown` endpoint.
|
|
117
|
-
The manager performs forced shutdown of the pipeline; returns to the
|
|
118
|
-
`PipelineStatus.SHUTDOWN` state.
|
|
119
96
|
|
|
97
|
+
class DeploymentRuntimeDesiredStatus(Enum):
|
|
120
98
|
"""
|
|
121
|
-
|
|
122
|
-
PAUSED = 5
|
|
99
|
+
Deployment runtime desired status of the pipeline.
|
|
123
100
|
"""
|
|
124
|
-
The pipeline is fully initialized, but data processing has been paused.
|
|
125
101
|
|
|
126
|
-
|
|
102
|
+
UNAVAILABLE = 0
|
|
103
|
+
STANDBY = 1
|
|
104
|
+
PAUSED = 2
|
|
105
|
+
RUNNING = 3
|
|
106
|
+
SUSPENDED = 4
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def from_str(value):
|
|
110
|
+
for member in DeploymentRuntimeDesiredStatus:
|
|
111
|
+
if member.name.lower() == value.lower():
|
|
112
|
+
return member
|
|
113
|
+
raise ValueError(
|
|
114
|
+
f"Unknown value '{value}' for enum {DeploymentRuntimeDesiredStatus.__name__}"
|
|
115
|
+
)
|
|
127
116
|
|
|
128
|
-
1. The user starts the pipeline by invoking the `/start` endpoint. The
|
|
129
|
-
manager passes the request to the pipeline; transitions to the
|
|
130
|
-
`PipelineStatus.RUNNING` state.
|
|
131
|
-
2. The user cancels the pipeline by invoking the `/shutdown` endpoint.
|
|
132
|
-
The manager passes the shutdown request to the pipeline to perform a
|
|
133
|
-
graceful shutdown; transitions to the `PipelineStatus.SHUTTING_DOWN` state.
|
|
134
|
-
3. An unexpected runtime error renders the pipeline `PipelineStatus.FAILED`.
|
|
135
117
|
|
|
118
|
+
class DeploymentRuntimeStatus(Enum):
|
|
136
119
|
"""
|
|
137
|
-
|
|
138
|
-
RUNNING = 6
|
|
120
|
+
Deployment runtime status of the pipeline.
|
|
139
121
|
"""
|
|
140
|
-
The pipeline is processing data.
|
|
141
122
|
|
|
142
|
-
|
|
123
|
+
UNAVAILABLE = 0
|
|
124
|
+
STANDBY = 1
|
|
125
|
+
AWAITINGAPPROVAL = 2
|
|
126
|
+
INITIALIZING = 3
|
|
127
|
+
BOOTSTRAPPING = 4
|
|
128
|
+
REPLAYING = 5
|
|
129
|
+
PAUSED = 6
|
|
130
|
+
RUNNING = 7
|
|
131
|
+
SUSPENDED = 8
|
|
143
132
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
`PipelineStatus.FAILED`.
|
|
133
|
+
@staticmethod
|
|
134
|
+
def from_str(value):
|
|
135
|
+
for member in DeploymentRuntimeStatus:
|
|
136
|
+
if member.name.lower() == value.lower():
|
|
137
|
+
return member
|
|
138
|
+
raise ValueError(
|
|
139
|
+
f"Unknown value '{value}' for enum {DeploymentRuntimeStatus.__name__}"
|
|
140
|
+
)
|
|
153
141
|
|
|
154
|
-
"""
|
|
155
142
|
|
|
156
|
-
|
|
143
|
+
class PipelineStatus(Enum):
|
|
144
|
+
"""
|
|
145
|
+
Represents the state that this pipeline is currently in.
|
|
157
146
|
"""
|
|
158
|
-
Graceful shutdown in progress.
|
|
159
147
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
148
|
+
NOT_FOUND = 0
|
|
149
|
+
STOPPED = 1
|
|
150
|
+
PROVISIONING = 2
|
|
151
|
+
UNAVAILABLE = 3
|
|
152
|
+
STANDBY = 4
|
|
153
|
+
AWAITINGAPPROVAL = 5
|
|
154
|
+
INITIALIZING = 6
|
|
155
|
+
BOOTSTRAPPING = 7
|
|
156
|
+
REPLAYING = 8
|
|
157
|
+
PAUSED = 9
|
|
158
|
+
RUNNING = 10
|
|
159
|
+
SUSPENDED = 11
|
|
160
|
+
STOPPING = 12
|
|
163
161
|
|
|
164
|
-
|
|
162
|
+
@staticmethod
|
|
163
|
+
def from_str(value):
|
|
164
|
+
for member in PipelineStatus:
|
|
165
|
+
if member.name.lower() == value.lower():
|
|
166
|
+
return member
|
|
167
|
+
raise ValueError(f"Unknown value '{value}' for enum {PipelineStatus.__name__}")
|
|
165
168
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
`PipelineStatus.SHUTDOWN` state.
|
|
169
|
+
def __eq__(self, other):
|
|
170
|
+
return self.value == other.value
|
|
169
171
|
|
|
170
|
-
"""
|
|
171
172
|
|
|
172
|
-
|
|
173
|
+
class TransactionStatus(Enum):
|
|
173
174
|
"""
|
|
174
|
-
|
|
175
|
-
by issuing a call to shutdown the pipeline; transitions to the
|
|
176
|
-
`PipelineStatus.SHUTDOWN` state.
|
|
175
|
+
Represents the transaction handling status of a pipeline.
|
|
177
176
|
"""
|
|
178
177
|
|
|
179
|
-
|
|
180
|
-
"""
|
|
181
|
-
The pipeline was at least once initialized, but in the most recent status check either
|
|
182
|
-
could not be reached or returned it is not yet ready.
|
|
183
|
-
"""
|
|
178
|
+
NoTransaction = 1
|
|
179
|
+
"""There is currently no active transaction."""
|
|
184
180
|
|
|
185
|
-
|
|
186
|
-
"""
|
|
187
|
-
|
|
188
|
-
|
|
181
|
+
TransactionInProgress = 2
|
|
182
|
+
"""There is an active transaction in progress."""
|
|
183
|
+
|
|
184
|
+
CommitInProgress = 3
|
|
185
|
+
"""A commit is currently in progress."""
|
|
189
186
|
|
|
190
187
|
@staticmethod
|
|
191
188
|
def from_str(value):
|
|
192
|
-
for member in
|
|
189
|
+
for member in TransactionStatus:
|
|
193
190
|
if member.name.lower() == value.lower():
|
|
194
191
|
return member
|
|
195
|
-
raise ValueError(
|
|
192
|
+
raise ValueError(
|
|
193
|
+
f"Unknown value '{value}' for enum {TransactionStatus.__name__}"
|
|
194
|
+
)
|
|
196
195
|
|
|
197
196
|
def __eq__(self, other):
|
|
198
197
|
return self.value == other.value
|
|
@@ -258,3 +257,110 @@ class CheckpointStatus(Enum):
|
|
|
258
257
|
"""
|
|
259
258
|
|
|
260
259
|
return self.error
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class StorageStatus(Enum):
|
|
263
|
+
"""
|
|
264
|
+
Represents the current storage usage status of the pipeline.
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
CLEARED = 0
|
|
268
|
+
"""
|
|
269
|
+
The pipeline has not been started before, or the user has cleared storage.
|
|
270
|
+
|
|
271
|
+
In this state, the pipeline has no storage resources bound to it.
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
INUSE = 1
|
|
275
|
+
"""
|
|
276
|
+
The pipeline was (attempted to be) started before, transitioning from `STOPPED`
|
|
277
|
+
to `PROVISIONING`, which caused the storage status to become `INUSE`.
|
|
278
|
+
|
|
279
|
+
Being in the `INUSE` state restricts certain edits while the pipeline is `STOPPED`.
|
|
280
|
+
|
|
281
|
+
The pipeline remains in this state until the user invokes `/clear`, transitioning
|
|
282
|
+
it to `CLEARING`.
|
|
283
|
+
"""
|
|
284
|
+
|
|
285
|
+
CLEARING = 2
|
|
286
|
+
"""
|
|
287
|
+
The pipeline is in the process of becoming unbound from its storage resources.
|
|
288
|
+
|
|
289
|
+
If storage resources are configured to be deleted upon clearing, their deletion
|
|
290
|
+
occurs before transitioning to `CLEARED`. Otherwise, no actual work is required,
|
|
291
|
+
and the transition happens immediately.
|
|
292
|
+
|
|
293
|
+
If storage is not deleted during clearing, the responsibility to manage or delete
|
|
294
|
+
those resources lies with the user.
|
|
295
|
+
"""
|
|
296
|
+
|
|
297
|
+
@staticmethod
|
|
298
|
+
def from_str(value):
|
|
299
|
+
for member in StorageStatus:
|
|
300
|
+
if member.name.lower() == value.lower():
|
|
301
|
+
return member
|
|
302
|
+
raise ValueError(f"Unknown value '{value}' for enum {StorageStatus.__name__}")
|
|
303
|
+
|
|
304
|
+
def __eq__(self, other):
|
|
305
|
+
return self.value == other.value
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
class FaultToleranceModel(Enum):
|
|
309
|
+
"""
|
|
310
|
+
The fault tolerance model.
|
|
311
|
+
"""
|
|
312
|
+
|
|
313
|
+
AtLeastOnce = 1
|
|
314
|
+
"""
|
|
315
|
+
Each record is output at least once. Crashes may duplicate output, but
|
|
316
|
+
no input or output is dropped.
|
|
317
|
+
"""
|
|
318
|
+
|
|
319
|
+
ExactlyOnce = 2
|
|
320
|
+
"""
|
|
321
|
+
Each record is output exactly once. Crashes do not drop or duplicate
|
|
322
|
+
input or output.
|
|
323
|
+
"""
|
|
324
|
+
|
|
325
|
+
def __str__(self) -> str:
|
|
326
|
+
match self:
|
|
327
|
+
case FaultToleranceModel.AtLeastOnce:
|
|
328
|
+
return "at_least_once"
|
|
329
|
+
case FaultToleranceModel.ExactlyOnce:
|
|
330
|
+
return "exactly_once"
|
|
331
|
+
|
|
332
|
+
@staticmethod
|
|
333
|
+
def from_str(value):
|
|
334
|
+
for member in FaultToleranceModel:
|
|
335
|
+
if str(member) == value.lower():
|
|
336
|
+
return member
|
|
337
|
+
|
|
338
|
+
raise ValueError(
|
|
339
|
+
f"Unknown value '{value}' for enum {FaultToleranceModel.__name__}"
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
class PipelineFieldSelector(Enum):
|
|
344
|
+
ALL = "all"
|
|
345
|
+
"""Select all fields of a pipeline."""
|
|
346
|
+
|
|
347
|
+
STATUS = "status"
|
|
348
|
+
"""Select only the fields required to know the status of a pipeline."""
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
class BootstrapPolicy(Enum):
|
|
352
|
+
AWAIT_APPROVAL = "await_approval"
|
|
353
|
+
ALLOW = "allow"
|
|
354
|
+
REJECT = "reject"
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
class CompletionTokenStatus(Enum):
|
|
358
|
+
COMPLETE = "complete"
|
|
359
|
+
"""
|
|
360
|
+
Feldera has completed processing all inputs represented by this token.
|
|
361
|
+
"""
|
|
362
|
+
|
|
363
|
+
IN_PROGRESS = "inprogress"
|
|
364
|
+
"""
|
|
365
|
+
Feldera is still processing the inputs represented by this token.
|
|
366
|
+
"""
|
feldera/output_handler.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
+
|
|
2
3
|
from typing import Optional
|
|
4
|
+
from threading import Event
|
|
3
5
|
|
|
4
|
-
from queue import Queue
|
|
5
6
|
from feldera import FelderaClient
|
|
6
7
|
from feldera._callback_runner import CallbackRunner
|
|
7
8
|
|
|
@@ -12,7 +13,6 @@ class OutputHandler:
|
|
|
12
13
|
client: FelderaClient,
|
|
13
14
|
pipeline_name: str,
|
|
14
15
|
view_name: str,
|
|
15
|
-
queue: Optional[Queue],
|
|
16
16
|
):
|
|
17
17
|
"""
|
|
18
18
|
Initializes the output handler, but doesn't start it.
|
|
@@ -22,17 +22,26 @@ class OutputHandler:
|
|
|
22
22
|
self.client: FelderaClient = client
|
|
23
23
|
self.pipeline_name: str = pipeline_name
|
|
24
24
|
self.view_name: str = view_name
|
|
25
|
-
self.queue: Optional[Queue] = queue
|
|
26
25
|
self.buffer: list[pd.DataFrame] = []
|
|
26
|
+
self.exception: Optional[BaseException] = None
|
|
27
|
+
self.event = Event()
|
|
27
28
|
|
|
28
29
|
# the callback that is passed to the `CallbackRunner`
|
|
29
30
|
def callback(df: pd.DataFrame, _: int):
|
|
30
31
|
if not df.empty:
|
|
31
32
|
self.buffer.append(df)
|
|
32
33
|
|
|
34
|
+
def exception_callback(exception: BaseException):
|
|
35
|
+
self.exception = exception
|
|
36
|
+
|
|
33
37
|
# sets up the callback runner
|
|
34
38
|
self.handler = CallbackRunner(
|
|
35
|
-
self.client,
|
|
39
|
+
self.client,
|
|
40
|
+
self.pipeline_name,
|
|
41
|
+
self.view_name,
|
|
42
|
+
callback,
|
|
43
|
+
exception_callback,
|
|
44
|
+
self.event,
|
|
36
45
|
)
|
|
37
46
|
|
|
38
47
|
def start(self):
|
|
@@ -41,6 +50,7 @@ class OutputHandler:
|
|
|
41
50
|
"""
|
|
42
51
|
|
|
43
52
|
self.handler.start()
|
|
53
|
+
_ = self.event.wait()
|
|
44
54
|
|
|
45
55
|
def to_pandas(self, clear_buffer: bool = True):
|
|
46
56
|
"""
|
|
@@ -49,6 +59,8 @@ class OutputHandler:
|
|
|
49
59
|
:param clear_buffer: Whether to clear the buffer after getting the output.
|
|
50
60
|
"""
|
|
51
61
|
|
|
62
|
+
if self.exception is not None:
|
|
63
|
+
raise self.exception
|
|
52
64
|
if len(self.buffer) == 0:
|
|
53
65
|
return pd.DataFrame()
|
|
54
66
|
res = pd.concat(self.buffer, ignore_index=True)
|