feldera 0.34.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of feldera might be problematic. Click here for more details.

@@ -0,0 +1,105 @@
1
+ Metadata-Version: 2.2
2
+ Name: feldera
3
+ Version: 0.34.1
4
+ Summary: The feldera python client
5
+ Author-email: Abhinav <abhinav.gyawali@feldera.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://www.feldera.com
8
+ Project-URL: Documentation, https://docs.feldera.com/python
9
+ Project-URL: Repository, https://github.com/feldera/feldera
10
+ Project-URL: Issues, https://github.com/feldera/feldera/issues
11
+ Keywords: feldera,python
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Operating System :: OS Independent
16
+ Requires-Python: >=3.10
17
+ Description-Content-Type: text/markdown
18
+ Requires-Dist: requests
19
+ Requires-Dist: pandas
20
+ Requires-Dist: typing-extensions
21
+ Requires-Dist: numpy<2
22
+ Requires-Dist: pretty-errors
23
+ Requires-Dist: ruff>=0.6.9
24
+
25
+ # Feldera Python SDK
26
+
27
+ Feldera Python is the Feldera SDK for Python developers.
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install feldera
33
+ ```
34
+
35
+ ### Installing from Github
36
+
37
+ ```bash
38
+ pip install git+https://github.com/feldera/feldera#subdirectory=python
39
+ ```
40
+
41
+ Similarly, to install from a specific branch:
42
+
43
+ ```bash
44
+ $ pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
45
+ ```
46
+
47
+ Replace `{BRANCH_NAME}` with the name of the branch you want to install from.
48
+
49
+ ### Installing from Local Directory
50
+
51
+ If you have cloned the Feldera repo, you can install the python SDK as follows:
52
+
53
+ ```bash
54
+ # the Feldera Python SDK is present inside the python/ directory
55
+ pip install python/
56
+ ```
57
+
58
+ Checkout the docs [here](./feldera/__init__.py) for an example on how to use the SDK.
59
+
60
+ ## Documentation
61
+
62
+ To build the html documentation run:
63
+
64
+ Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
65
+
66
+ Then run the following commands:
67
+
68
+ ```bash
69
+ cd docs
70
+ sphinx-apidoc -o . ../feldera
71
+ make html
72
+ ```
73
+
74
+ To clean the build, run `make clean`.
75
+
76
+ ## Testing
77
+
78
+ To run unit tests:
79
+
80
+ ```bash
81
+ (cd python && python3 -m unittest)
82
+ ```
83
+
84
+ The following command runs end-to-end tests. You'll need a pipeline
85
+ manager running at `http://localhost:8080`. For the pipeline builder
86
+ tests, you'll also need a broker available at `localhost:9092` and
87
+ (from the pipelines) `redpanda:19092`. (To change those locations,
88
+ set the environment variables listed in `python/tests/__init__.py`.)
89
+
90
+ ```bash
91
+ (cd python/tests && python3 -m pytest .)
92
+ ```
93
+
94
+ To run tests from a specific file:
95
+
96
+ ```bash
97
+ (cd python/tests && python3 -m unittest ./tests/path-to-file.py)
98
+ ```
99
+
100
+ To run the aggregate tests use:
101
+
102
+ ```bash
103
+ cd python
104
+ PYTHONPATH=`pwd` python3 ./tests/aggregate_tests/main.py
105
+ ```
@@ -0,0 +1,81 @@
1
+ # Feldera Python SDK
2
+
3
+ Feldera Python is the Feldera SDK for Python developers.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install feldera
9
+ ```
10
+
11
+ ### Installing from Github
12
+
13
+ ```bash
14
+ pip install git+https://github.com/feldera/feldera#subdirectory=python
15
+ ```
16
+
17
+ Similarly, to install from a specific branch:
18
+
19
+ ```bash
20
+ $ pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
21
+ ```
22
+
23
+ Replace `{BRANCH_NAME}` with the name of the branch you want to install from.
24
+
25
+ ### Installing from Local Directory
26
+
27
+ If you have cloned the Feldera repo, you can install the python SDK as follows:
28
+
29
+ ```bash
30
+ # the Feldera Python SDK is present inside the python/ directory
31
+ pip install python/
32
+ ```
33
+
34
+ Checkout the docs [here](./feldera/__init__.py) for an example on how to use the SDK.
35
+
36
+ ## Documentation
37
+
38
+ To build the html documentation run:
39
+
40
+ Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
41
+
42
+ Then run the following commands:
43
+
44
+ ```bash
45
+ cd docs
46
+ sphinx-apidoc -o . ../feldera
47
+ make html
48
+ ```
49
+
50
+ To clean the build, run `make clean`.
51
+
52
+ ## Testing
53
+
54
+ To run unit tests:
55
+
56
+ ```bash
57
+ (cd python && python3 -m unittest)
58
+ ```
59
+
60
+ The following command runs end-to-end tests. You'll need a pipeline
61
+ manager running at `http://localhost:8080`. For the pipeline builder
62
+ tests, you'll also need a broker available at `localhost:9092` and
63
+ (from the pipelines) `redpanda:19092`. (To change those locations,
64
+ set the environment variables listed in `python/tests/__init__.py`.)
65
+
66
+ ```bash
67
+ (cd python/tests && python3 -m pytest .)
68
+ ```
69
+
70
+ To run tests from a specific file:
71
+
72
+ ```bash
73
+ (cd python/tests && python3 -m unittest ./tests/path-to-file.py)
74
+ ```
75
+
76
+ To run the aggregate tests use:
77
+
78
+ ```bash
79
+ cd python
80
+ PYTHONPATH=`pwd` python3 ./tests/aggregate_tests/main.py
81
+ ```
@@ -0,0 +1,11 @@
1
+ from feldera.rest.feldera_client import FelderaClient as FelderaClient
2
+ from feldera.pipeline import Pipeline as Pipeline
3
+ from feldera.pipeline_builder import PipelineBuilder as PipelineBuilder
4
+
5
+ import pretty_errors
6
+
7
+ pretty_errors.configure(
8
+ line_number_first=True,
9
+ )
10
+
11
+ pretty_errors.activate()
@@ -0,0 +1,116 @@
1
+ from enum import Enum
2
+ from threading import Thread
3
+ from typing import Callable, Optional
4
+ from queue import Queue, Empty
5
+
6
+ import pandas as pd
7
+ from feldera import FelderaClient
8
+ from feldera._helpers import dataframe_from_response
9
+
10
+
11
+ class _CallbackRunnerInstruction(Enum):
12
+ PipelineStarted = 1
13
+ RanToCompletion = 2
14
+
15
+
16
+ class CallbackRunner(Thread):
17
+ def __init__(
18
+ self,
19
+ client: FelderaClient,
20
+ pipeline_name: str,
21
+ view_name: str,
22
+ callback: Callable[[pd.DataFrame, int], None],
23
+ queue: Optional[Queue],
24
+ ):
25
+ super().__init__()
26
+ self.daemon = True
27
+ self.client: FelderaClient = client
28
+ self.pipeline_name: str = pipeline_name
29
+ self.view_name: str = view_name
30
+ self.callback: Callable[[pd.DataFrame, int], None] = callback
31
+ self.queue: Optional[Queue] = queue
32
+ self.schema: Optional[dict] = None
33
+
34
+ def run(self):
35
+ """
36
+ The main loop of the thread. Listens for data and calls the callback function on each chunk of data received.
37
+
38
+ :meta private:
39
+ """
40
+
41
+ pipeline = self.client.get_pipeline(self.pipeline_name)
42
+ schema = pipeline.program_info["schema"]
43
+
44
+ if schema:
45
+ schemas = [relation for relation in schema["inputs"] + schema["outputs"]]
46
+ for schema in schemas:
47
+ if schema["name"] == self.view_name:
48
+ self.schema = schema
49
+ break
50
+
51
+ if self.schema is None:
52
+ raise ValueError(
53
+ f"Table or View {self.view_name} not found in the pipeline schema."
54
+ )
55
+
56
+ # by default, we assume that the pipeline has been started
57
+ ack: _CallbackRunnerInstruction = _CallbackRunnerInstruction.PipelineStarted
58
+
59
+ # if there is Queue, we wait for the instruction to start the pipeline
60
+ # this means that we are listening to the pipeline before running it, therefore, all data should be received
61
+ if self.queue:
62
+ ack: _CallbackRunnerInstruction = self.queue.get()
63
+
64
+ match ack:
65
+ # if the pipeline has actually been started, we start a listener
66
+ case _CallbackRunnerInstruction.PipelineStarted:
67
+ # listen to the pipeline
68
+ gen_obj = self.client.listen_to_pipeline(
69
+ self.pipeline_name, self.view_name, format="json"
70
+ )
71
+
72
+ # if there is a queue set up, inform the main thread that the listener has been started, and it can
73
+ # proceed with starting the pipeline
74
+ if self.queue:
75
+ # stop blocking the main thread on `join` for the previous message
76
+ self.queue.task_done()
77
+
78
+ for chunk in gen_obj:
79
+ chunk: dict = chunk
80
+ data: list[dict] = chunk.get("json_data")
81
+ seq_no: int = chunk.get("sequence_number")
82
+
83
+ if data is not None:
84
+ self.callback(dataframe_from_response([data], schema), seq_no)
85
+
86
+ if self.queue:
87
+ try:
88
+ # if a non-blocking way, check if the queue has received further instructions
89
+ # this should be a RanToCompletion instruction, which means that the pipeline has been
90
+ # completed
91
+ again_ack = self.queue.get_nowait()
92
+
93
+ # if the queue has received a message
94
+ if again_ack:
95
+ match again_ack:
96
+ case _CallbackRunnerInstruction.RanToCompletion:
97
+ # stop blocking the main thread on `join` and return from this thread
98
+ self.queue.task_done()
99
+
100
+ return
101
+
102
+ case _CallbackRunnerInstruction.PipelineStarted:
103
+ # if the pipeline has been started again, which shouldn't happen,
104
+ # ignore it and continue listening, call `task_done` to avoid blocking the main
105
+ # thread on `join`
106
+ self.queue.task_done()
107
+
108
+ continue
109
+ except Empty:
110
+ # if the queue is empty, continue listening
111
+ continue
112
+
113
+ case _CallbackRunnerInstruction.RanToCompletion:
114
+ if self.queue:
115
+ self.queue.task_done()
116
+ return
@@ -0,0 +1,104 @@
1
+ import pandas as pd
2
+ from decimal import Decimal
3
+
4
+
5
+ def sql_type_to_pandas_type(sql_type: str):
6
+ """
7
+ Converts a SQL type to a pandas type.
8
+ """
9
+
10
+ match sql_type.upper():
11
+ case "BOOLEAN":
12
+ return "boolean"
13
+ case "TINYINT":
14
+ return "Int8"
15
+ case "SMALLINT":
16
+ return "Int16"
17
+ case "INTEGER":
18
+ return "Int32"
19
+ case "BIGINT":
20
+ return "Int64"
21
+ case "REAL":
22
+ return "Float32"
23
+ case "DOUBLE":
24
+ return "Float64"
25
+ case "DECIMAL":
26
+ return None
27
+ case "CHAR":
28
+ return "str"
29
+ case "VARCHAR":
30
+ return "str"
31
+ case "DATE" | "TIMESTAMP":
32
+ return "datetime64[ns]"
33
+ case "TIME" | "INTERVAL":
34
+ return "timedelta64[ns]"
35
+ case "ARRAY":
36
+ return None
37
+ case "NULL":
38
+ return None
39
+ case "BINARY" | "VARBINARY":
40
+ return None
41
+ case "STRUCT" | "MAP":
42
+ return None
43
+
44
+
45
+ def ensure_dataframe_has_columns(df: pd.DataFrame):
46
+ """
47
+ Ensures that the DataFrame has column names set.
48
+ """
49
+
50
+ if [v for v in range(df.shape[1])] == list(df.columns):
51
+ raise ValueError(
52
+ """
53
+ DataFrame has no column names set.
54
+ Input DataFrame must have column names set and they must be consistent with the columns in the input table.
55
+ """
56
+ )
57
+
58
+
59
+ def dataframe_from_response(buffer: list[list[dict]], schema: dict):
60
+ """
61
+ Converts the response from Feldera to a pandas DataFrame.
62
+ """
63
+
64
+ pd_schema = {}
65
+
66
+ decimal_col = []
67
+
68
+ for column in schema["fields"]:
69
+ column_name = column["name"]
70
+ if not column["case_sensitive"]:
71
+ column_name = column_name.lower()
72
+ column_type = column["columntype"]["type"]
73
+ if column_type == "DECIMAL":
74
+ decimal_col.append(column_name)
75
+
76
+ pd_schema[column_name] = sql_type_to_pandas_type(column_type)
77
+
78
+ data = [
79
+ {**item["insert"], "insert_delete": 1}
80
+ if "insert" in item
81
+ else {**item["delete"], "insert_delete": -1}
82
+ for sublist in buffer
83
+ for item in sublist
84
+ ]
85
+
86
+ if len(decimal_col) != 0:
87
+ for datum in data:
88
+ for col in decimal_col:
89
+ if datum[col] is not None:
90
+ datum[col] = Decimal(datum[col])
91
+
92
+ df = pd.DataFrame(data)
93
+ df = df.astype(pd_schema)
94
+
95
+ return df
96
+
97
+
98
+ def chunk_dataframe(df, chunk_size=1000):
99
+ """
100
+ Yield successive n-sized chunks from the given dataframe.
101
+ """
102
+
103
+ for i in range(0, len(df), chunk_size):
104
+ yield df.iloc[i : i + chunk_size]
@@ -0,0 +1,234 @@
1
+ from enum import Enum
2
+ from typing import Optional
3
+
4
+
5
+ class CompilationProfile(Enum):
6
+ """
7
+ The compilation profile to use when compiling the program.
8
+ """
9
+
10
+ SERVER_DEFAULT = None
11
+ """
12
+ The compiler server default compilation profile.
13
+ """
14
+
15
+ DEV = "dev"
16
+ """
17
+ The development compilation profile.
18
+ """
19
+
20
+ UNOPTIMIZED = "unoptimized"
21
+ """
22
+ The unoptimized compilation profile.
23
+ """
24
+
25
+ OPTIMIZED = "optimized"
26
+ """
27
+ The optimized compilation profile, the default for this API.
28
+ """
29
+
30
+
31
+ class BuildMode(Enum):
32
+ CREATE = 1
33
+ GET = 2
34
+ GET_OR_CREATE = 3
35
+
36
+
37
+ class PipelineStatus(Enum):
38
+ """
39
+ Represents the state that this pipeline is currently in.
40
+
41
+ .. code-block:: text
42
+
43
+ Shutdown ◄────┐
44
+ │ │
45
+ /deploy │ │
46
+ │ ⌛ShuttingDown
47
+ ▼ ▲
48
+ ⌛Provisioning │
49
+ │ │
50
+ Provisioned │
51
+ ▼ │/shutdown
52
+ ⌛Initializing │
53
+ │ │
54
+ ┌────────┴─────────┴─┐
55
+ │ ▼ │
56
+ │ Paused │
57
+ │ │ ▲ │
58
+ │/start│ │/pause │
59
+ │ ▼ │ │
60
+ │ Running │
61
+ └──────────┬─────────┘
62
+
63
+
64
+ Failed
65
+ """
66
+
67
+ NOT_FOUND = 1
68
+ """
69
+ The pipeline has not been created yet.
70
+ """
71
+
72
+ SHUTDOWN = 2
73
+ """
74
+ Pipeline has not been started or has been shut down.
75
+
76
+ The pipeline remains in this state until the user triggers
77
+ a deployment by invoking the `/deploy` endpoint.
78
+ """
79
+
80
+ PROVISIONING = 3
81
+ """
82
+ The runner triggered a deployment of the pipeline and is
83
+ waiting for the pipeline HTTP server to come up.
84
+
85
+ In this state, the runner provisions a runtime for the pipeline,
86
+ starts the pipeline within this runtime and waits for it to start accepting HTTP requests.
87
+
88
+ The user is unable to communicate with the pipeline during this
89
+ time. The pipeline remains in this state until:
90
+
91
+ 1. Its HTTP server is up and running; the pipeline transitions to the
92
+ `PipelineStatus.INITIALIZING` state.
93
+ 2. A pre-defined timeout has passed. The runner performs forced
94
+ shutdown of the pipeline; returns to the `PipelineStatus.SHUTDOWN` state.
95
+ 3. The user cancels the pipeline by invoking the `/shutdown` endpoint.
96
+ The manager performs forced shutdown of the pipeline, returns to the
97
+ `PipelineStatus.SHUTDOWN` state.
98
+
99
+ """
100
+
101
+ INITIALIZING = 4
102
+ """
103
+ The pipeline is initializing its internal state and connectors.
104
+
105
+ This state is part of the pipeline's deployment process. In this state,
106
+ the pipeline's HTTP server is up and running, but its query engine
107
+ and input and output connectors are still initializing.
108
+
109
+ The pipeline remains in this state until:
110
+
111
+ 1. Initialization completes successfully; the pipeline transitions to the
112
+ `PipelineStatus.PAUSED` state.
113
+ 2. Initialization fails; transitions to the `PipelineStatus.FAILED` state.
114
+ 3. A pre-defined timeout has passed. The runner performs forced
115
+ shutdown of the pipeline; returns to the `PipelineStatus.SHUTDOWN` state.
116
+ 4. The user cancels the pipeline by invoking the `/shutdown` endpoint.
117
+ The manager performs forced shutdown of the pipeline; returns to the
118
+ `PipelineStatus.SHUTDOWN` state.
119
+
120
+ """
121
+
122
+ PAUSED = 5
123
+ """
124
+ The pipeline is fully initialized, but data processing has been paused.
125
+
126
+ The pipeline remains in this state until:
127
+
128
+ 1. The user starts the pipeline by invoking the `/start` endpoint. The
129
+ manager passes the request to the pipeline; transitions to the
130
+ `PipelineStatus.RUNNING` state.
131
+ 2. The user cancels the pipeline by invoking the `/shutdown` endpoint.
132
+ The manager passes the shutdown request to the pipeline to perform a
133
+ graceful shutdown; transitions to the `PipelineStatus.SHUTTING_DOWN` state.
134
+ 3. An unexpected runtime error renders the pipeline `PipelineStatus.FAILED`.
135
+
136
+ """
137
+
138
+ RUNNING = 6
139
+ """
140
+ The pipeline is processing data.
141
+
142
+ The pipeline remains in this state until:
143
+
144
+ 1. The user pauses the pipeline by invoking the `/pause` endpoint. The
145
+ manager passes the request to the pipeline; transitions to the
146
+ `PipelineStatus.PAUSED` state.
147
+ 2. The user cancels the pipeline by invoking the `/shutdown` endpoint.
148
+ The runner passes the shutdown request to the pipeline to perform a
149
+ graceful shutdown; transitions to the
150
+ `PipelineStatus.SHUTTING_DOWN` state.
151
+ 3. An unexpected runtime error renders the pipeline
152
+ `PipelineStatus.FAILED`.
153
+
154
+ """
155
+
156
+ SHUTTING_DOWN = 7
157
+ """
158
+ Graceful shutdown in progress.
159
+
160
+ In this state, the pipeline finishes any ongoing data processing,
161
+ produces final outputs, shuts down input/output connectors and
162
+ terminates.
163
+
164
+ The pipeline remains in this state until:
165
+
166
+ 1. Shutdown completes successfully; transitions to the `PipelineStatus.SHUTDOWN` state.
167
+ 2. A pre-defined timeout has passed. The manager performs forced shutdown of the pipeline; returns to the
168
+ `PipelineStatus.SHUTDOWN` state.
169
+
170
+ """
171
+
172
+ FAILED = 8
173
+ """
174
+ The pipeline remains in this state until the users acknowledge the failure
175
+ by issuing a call to shutdown the pipeline; transitions to the
176
+ `PipelineStatus.SHUTDOWN` state.
177
+ """
178
+
179
+ UNAVAILABLE = 9
180
+ """
181
+ The pipeline was at least once initialized, but in the most recent status check either
182
+ could not be reached or returned it is not yet ready.
183
+ """
184
+
185
+ @staticmethod
186
+ def from_str(value):
187
+ for member in PipelineStatus:
188
+ if member.name.lower() == value.lower():
189
+ return member
190
+ raise ValueError(f"Unknown value '{value}' for enum {PipelineStatus.__name__}")
191
+
192
+ def __eq__(self, other):
193
+ return self.value == other.value
194
+
195
+
196
+ class ProgramStatus(Enum):
197
+ Pending = 1
198
+ CompilingSql = 2
199
+ SqlCompiled = 3
200
+ CompilingRust = 4
201
+ Success = 5
202
+ SqlError = 6
203
+ RustError = 7
204
+ SystemError = 8
205
+
206
+ def __init__(self, value):
207
+ self.error: Optional[dict] = None
208
+ self._value_ = value
209
+
210
+ @staticmethod
211
+ def from_value(value):
212
+ error = None
213
+ if isinstance(value, dict):
214
+ error = value
215
+ value = list(value.keys())[0]
216
+
217
+ for member in ProgramStatus:
218
+ if member.name.lower() == value.lower():
219
+ member.error = error
220
+ return member
221
+ raise ValueError(f"Unknown value '{value}' for enum {ProgramStatus.__name__}")
222
+
223
+ def __eq__(self, other):
224
+ return self.value == other.value
225
+
226
+ def __str__(self):
227
+ return self.name + (f": ({self.error})" if self.error else "")
228
+
229
+ def get_error(self) -> Optional[dict]:
230
+ """
231
+ Returns the compilation error, if any.
232
+ """
233
+
234
+ return self.error