feldera 0.100.0__tar.gz → 0.102.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of feldera might be problematic. Click here for more details.
- {feldera-0.100.0 → feldera-0.102.0}/PKG-INFO +45 -16
- feldera-0.102.0/README.md +129 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/_callback_runner.py +8 -7
- {feldera-0.100.0 → feldera-0.102.0}/feldera/enums.py +35 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/pipeline.py +10 -9
- {feldera-0.100.0 → feldera-0.102.0}/feldera/pipeline_builder.py +7 -5
- {feldera-0.100.0 → feldera-0.102.0}/feldera/rest/feldera_client.py +3 -3
- {feldera-0.100.0 → feldera-0.102.0}/feldera/runtime_config.py +23 -1
- {feldera-0.100.0 → feldera-0.102.0}/feldera.egg-info/PKG-INFO +45 -16
- {feldera-0.100.0 → feldera-0.102.0}/feldera.egg-info/SOURCES.txt +3 -3
- {feldera-0.100.0 → feldera-0.102.0}/pyproject.toml +1 -1
- feldera-0.102.0/tests/test_pipeline_builder.py +53 -0
- feldera-0.102.0/tests/test_shared_pipeline0.py +593 -0
- feldera-0.102.0/tests/test_shared_pipeline1.py +72 -0
- {feldera-0.100.0 → feldera-0.102.0}/tests/test_udf.py +0 -1
- feldera-0.100.0/README.md +0 -100
- feldera-0.100.0/tests/test_pipeline.py +0 -263
- feldera-0.100.0/tests/test_pipeline_builder.py +0 -1199
- feldera-0.100.0/tests/test_variant.py +0 -102
- {feldera-0.100.0 → feldera-0.102.0}/feldera/__init__.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/_helpers.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/output_handler.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/rest/__init__.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/rest/_helpers.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/rest/_httprequests.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/rest/config.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/rest/errors.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/rest/feldera_config.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/rest/pipeline.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/rest/sql_table.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/rest/sql_view.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera/stats.py +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera.egg-info/dependency_links.txt +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera.egg-info/requires.txt +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/feldera.egg-info/top_level.txt +0 -0
- {feldera-0.100.0 → feldera-0.102.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: feldera
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.102.0
|
|
4
4
|
Summary: The feldera python client
|
|
5
5
|
Author-email: Feldera Team <dev@feldera.com>
|
|
6
6
|
License: MIT
|
|
@@ -54,10 +54,11 @@ If you have cloned the Feldera repo, you can install the python SDK as follows:
|
|
|
54
54
|
pip install python/
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
Checkout the docs [here](./feldera/__init__.py) for an example on how to use the SDK.
|
|
58
|
-
|
|
59
57
|
## Documentation
|
|
60
58
|
|
|
59
|
+
The Python SDK documentation is available at
|
|
60
|
+
[Feldera Python SDK Docs](https://docs.feldera.com/python).
|
|
61
|
+
|
|
61
62
|
To build the html documentation run:
|
|
62
63
|
|
|
63
64
|
Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
|
|
@@ -77,27 +78,23 @@ To clean the build, run `make clean`.
|
|
|
77
78
|
To run unit tests:
|
|
78
79
|
|
|
79
80
|
```bash
|
|
80
|
-
|
|
81
|
+
cd python && python3 -m pytest tests/
|
|
81
82
|
```
|
|
82
83
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
tests, you'll also need a broker available at `localhost:9092` and
|
|
88
|
-
(from the pipelines) `redpanda:19092`. (To change those locations,
|
|
89
|
-
set the environment variables listed in `python/tests/__init__.py`.)
|
|
90
|
-
|
|
91
|
-
```bash
|
|
92
|
-
(cd python && python3 -m pytest tests)
|
|
93
|
-
```
|
|
84
|
+
- This will detect and run all test files that match the pattern `test_*.py` or
|
|
85
|
+
`*_test.py`.
|
|
86
|
+
- By default, the tests expect a running Feldera instance at `http://localhost:8080`.
|
|
87
|
+
To override the default endpoint, set the `FELDERA_BASE_URL` environment variable.
|
|
94
88
|
|
|
95
89
|
To run tests from a specific file:
|
|
96
90
|
|
|
97
91
|
```bash
|
|
98
|
-
(cd python && python3 -m
|
|
92
|
+
(cd python && python3 -m pytest ./tests/path-to-file.py)
|
|
99
93
|
```
|
|
100
94
|
|
|
95
|
+
#### Running Aggregate Tests
|
|
96
|
+
|
|
97
|
+
The aggregate tests validate end-to-end correctness of SQL functionality.
|
|
101
98
|
To run the aggregate tests use:
|
|
102
99
|
|
|
103
100
|
```bash
|
|
@@ -105,6 +102,38 @@ cd python
|
|
|
105
102
|
PYTHONPATH=`pwd` python3 ./tests/aggregate_tests/main.py
|
|
106
103
|
```
|
|
107
104
|
|
|
105
|
+
### Reducing Compilation Cycles
|
|
106
|
+
|
|
107
|
+
To reduce redundant compilation cycles during testing:
|
|
108
|
+
|
|
109
|
+
* **Inherit from `SharedTestPipeline`** instead of `unittest.TestCase`.
|
|
110
|
+
* **Define DDLs** (e.g., `CREATE TABLE`, `CREATE VIEW`) in the **docstring** of each test method.
|
|
111
|
+
* All DDLs from all test functions in the class are combined and compiled into a single pipeline.
|
|
112
|
+
* If a table or view is already defined in one test, it can be used directly in others without redefinition.
|
|
113
|
+
* Ensure that all table and view names are unique within the class.
|
|
114
|
+
* Use `@enterprise_only` on tests that require Enterprise features. Their DDLs will be skipped on OSS builds.
|
|
115
|
+
* Use `self.set_runtime_config(...)` to override the default pipeline config.
|
|
116
|
+
* Reset it at the end using `self.reset_runtime_config()`.
|
|
117
|
+
* Access the shared pipeline via `self.pipeline`.
|
|
118
|
+
|
|
119
|
+
#### Example
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from tests.shared_test_pipeline import SharedTestPipeline
|
|
123
|
+
|
|
124
|
+
class TestAverage(SharedTestPipeline):
|
|
125
|
+
def test_average(self):
|
|
126
|
+
"""
|
|
127
|
+
CREATE TABLE students(id INT, name STRING);
|
|
128
|
+
CREATE MATERIALIZED VIEW v AS SELECT * FROM students;
|
|
129
|
+
"""
|
|
130
|
+
...
|
|
131
|
+
self.pipeline.start()
|
|
132
|
+
self.pipeline.input_pandas("students", df)
|
|
133
|
+
self.pipeline.wait_for_completion(True)
|
|
134
|
+
...
|
|
135
|
+
```
|
|
136
|
+
|
|
108
137
|
## Linting and formatting
|
|
109
138
|
|
|
110
139
|
Use [Ruff] to run the lint checks that will be executed by the
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Feldera Python SDK
|
|
2
|
+
|
|
3
|
+
Feldera Python is the Feldera SDK for Python developers.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install feldera
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
### Installing from Github
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install git+https://github.com/feldera/feldera#subdirectory=python
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Similarly, to install from a specific branch:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
$ pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Replace `{BRANCH_NAME}` with the name of the branch you want to install from.
|
|
24
|
+
|
|
25
|
+
### Installing from Local Directory
|
|
26
|
+
|
|
27
|
+
If you have cloned the Feldera repo, you can install the python SDK as follows:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
# the Feldera Python SDK is present inside the python/ directory
|
|
31
|
+
pip install python/
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Documentation
|
|
35
|
+
|
|
36
|
+
The Python SDK documentation is available at
|
|
37
|
+
[Feldera Python SDK Docs](https://docs.feldera.com/python).
|
|
38
|
+
|
|
39
|
+
To build the html documentation run:
|
|
40
|
+
|
|
41
|
+
Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
|
|
42
|
+
|
|
43
|
+
Then run the following commands:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
cd docs
|
|
47
|
+
sphinx-apidoc -o . ../feldera
|
|
48
|
+
make html
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
To clean the build, run `make clean`.
|
|
52
|
+
|
|
53
|
+
## Testing
|
|
54
|
+
|
|
55
|
+
To run unit tests:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
cd python && python3 -m pytest tests/
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
- This will detect and run all test files that match the pattern `test_*.py` or
|
|
62
|
+
`*_test.py`.
|
|
63
|
+
- By default, the tests expect a running Feldera instance at `http://localhost:8080`.
|
|
64
|
+
To override the default endpoint, set the `FELDERA_BASE_URL` environment variable.
|
|
65
|
+
|
|
66
|
+
To run tests from a specific file:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
(cd python && python3 -m pytest ./tests/path-to-file.py)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
#### Running Aggregate Tests
|
|
73
|
+
|
|
74
|
+
The aggregate tests validate end-to-end correctness of SQL functionality.
|
|
75
|
+
To run the aggregate tests use:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
cd python
|
|
79
|
+
PYTHONPATH=`pwd` python3 ./tests/aggregate_tests/main.py
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Reducing Compilation Cycles
|
|
83
|
+
|
|
84
|
+
To reduce redundant compilation cycles during testing:
|
|
85
|
+
|
|
86
|
+
* **Inherit from `SharedTestPipeline`** instead of `unittest.TestCase`.
|
|
87
|
+
* **Define DDLs** (e.g., `CREATE TABLE`, `CREATE VIEW`) in the **docstring** of each test method.
|
|
88
|
+
* All DDLs from all test functions in the class are combined and compiled into a single pipeline.
|
|
89
|
+
* If a table or view is already defined in one test, it can be used directly in others without redefinition.
|
|
90
|
+
* Ensure that all table and view names are unique within the class.
|
|
91
|
+
* Use `@enterprise_only` on tests that require Enterprise features. Their DDLs will be skipped on OSS builds.
|
|
92
|
+
* Use `self.set_runtime_config(...)` to override the default pipeline config.
|
|
93
|
+
* Reset it at the end using `self.reset_runtime_config()`.
|
|
94
|
+
* Access the shared pipeline via `self.pipeline`.
|
|
95
|
+
|
|
96
|
+
#### Example
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from tests.shared_test_pipeline import SharedTestPipeline
|
|
100
|
+
|
|
101
|
+
class TestAverage(SharedTestPipeline):
|
|
102
|
+
def test_average(self):
|
|
103
|
+
"""
|
|
104
|
+
CREATE TABLE students(id INT, name STRING);
|
|
105
|
+
CREATE MATERIALIZED VIEW v AS SELECT * FROM students;
|
|
106
|
+
"""
|
|
107
|
+
...
|
|
108
|
+
self.pipeline.start()
|
|
109
|
+
self.pipeline.input_pandas("students", df)
|
|
110
|
+
self.pipeline.wait_for_completion(True)
|
|
111
|
+
...
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Linting and formatting
|
|
115
|
+
|
|
116
|
+
Use [Ruff] to run the lint checks that will be executed by the
|
|
117
|
+
precommit hook when a PR is submitted:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
ruff check python/
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
To reformat the code in the same way as the precommit hook:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
ruff format
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
[Ruff]: https://github.com/astral-sh/ruff
|
|
@@ -54,12 +54,12 @@ class CallbackRunner(Thread):
|
|
|
54
54
|
)
|
|
55
55
|
|
|
56
56
|
# by default, we assume that the pipeline has been started
|
|
57
|
-
ack
|
|
57
|
+
ack = _CallbackRunnerInstruction.PipelineStarted
|
|
58
58
|
|
|
59
59
|
# if there is Queue, we wait for the instruction to start the pipeline
|
|
60
60
|
# this means that we are listening to the pipeline before running it, therefore, all data should be received
|
|
61
61
|
if self.queue:
|
|
62
|
-
ack
|
|
62
|
+
ack = self.queue.get()
|
|
63
63
|
|
|
64
64
|
match ack:
|
|
65
65
|
# if the pipeline has actually been started, we start a listener
|
|
@@ -77,11 +77,12 @@ class CallbackRunner(Thread):
|
|
|
77
77
|
|
|
78
78
|
for chunk in gen_obj:
|
|
79
79
|
chunk: dict = chunk
|
|
80
|
-
data: list[dict] = chunk.get("json_data")
|
|
81
|
-
seq_no: int = chunk.get("sequence_number")
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
80
|
+
data: Optional[list[dict]] = chunk.get("json_data")
|
|
81
|
+
seq_no: Optional[int] = chunk.get("sequence_number")
|
|
82
|
+
if data is not None and seq_no is not None:
|
|
83
|
+
self.callback(
|
|
84
|
+
dataframe_from_response([data], self.schema), seq_no
|
|
85
|
+
)
|
|
85
86
|
|
|
86
87
|
if self.queue:
|
|
87
88
|
try:
|
|
@@ -276,3 +276,38 @@ class StorageStatus(Enum):
|
|
|
276
276
|
|
|
277
277
|
def __eq__(self, other):
|
|
278
278
|
return self.value == other.value
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class FaultToleranceModel(Enum):
|
|
282
|
+
"""
|
|
283
|
+
The fault tolerance model.
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
AtLeastOnce = 1
|
|
287
|
+
"""
|
|
288
|
+
Each record is output at least once. Crashes may duplicate output, but
|
|
289
|
+
no input or output is dropped.
|
|
290
|
+
"""
|
|
291
|
+
|
|
292
|
+
ExactlyOnce = 2
|
|
293
|
+
"""
|
|
294
|
+
Each record is output exactly once. Crashes do not drop or duplicate
|
|
295
|
+
input or output.
|
|
296
|
+
"""
|
|
297
|
+
|
|
298
|
+
def __str__(self) -> str:
|
|
299
|
+
match self:
|
|
300
|
+
case FaultToleranceModel.AtLeastOnce:
|
|
301
|
+
return "at_least_once"
|
|
302
|
+
case FaultToleranceModel.ExactlyOnce:
|
|
303
|
+
return "exactly_once"
|
|
304
|
+
|
|
305
|
+
@staticmethod
|
|
306
|
+
def from_str(value):
|
|
307
|
+
for member in FaultToleranceModel:
|
|
308
|
+
if str(member) == value.lower():
|
|
309
|
+
return member
|
|
310
|
+
|
|
311
|
+
raise ValueError(
|
|
312
|
+
f"Unknown value '{value}' for enum {FaultToleranceModel.__name__}"
|
|
313
|
+
)
|
|
@@ -145,7 +145,7 @@ class Pipeline:
|
|
|
145
145
|
:param data: The JSON encoded data to be pushed to the pipeline. The data should be in the form:
|
|
146
146
|
`{'col1': 'val1', 'col2': 'val2'}` or `[{'col1': 'val1', 'col2': 'val2'}, {'col1': 'val1', 'col2': 'val2'}]`
|
|
147
147
|
:param update_format: The update format of the JSON data to be pushed to the pipeline. Must be one of:
|
|
148
|
-
"raw", "insert_delete".
|
|
148
|
+
"raw", "insert_delete". https://docs.feldera.com/formats/json#the-insertdelete-format
|
|
149
149
|
:param force: `True` to push data even if the pipeline is paused. `False` by default.
|
|
150
150
|
|
|
151
151
|
:raises ValueError: If the update format is invalid.
|
|
@@ -180,7 +180,7 @@ class Pipeline:
|
|
|
180
180
|
All connectors are RUNNING by default.
|
|
181
181
|
|
|
182
182
|
Refer to the connector documentation for more information:
|
|
183
|
-
|
|
183
|
+
https://docs.feldera.com/connectors/#input-connector-orchestration
|
|
184
184
|
|
|
185
185
|
:param table_name: The name of the table that the connector is attached to.
|
|
186
186
|
:param connector_name: The name of the connector to pause.
|
|
@@ -199,7 +199,7 @@ class Pipeline:
|
|
|
199
199
|
All connectors are RUNNING by default.
|
|
200
200
|
|
|
201
201
|
Refer to the connector documentation for more information:
|
|
202
|
-
|
|
202
|
+
https://docs.feldera.com/connectors/#input-connector-orchestration
|
|
203
203
|
|
|
204
204
|
:param table_name: The name of the table that the connector is attached to.
|
|
205
205
|
:param connector_name: The name of the connector to resume.
|
|
@@ -473,10 +473,13 @@ metrics"""
|
|
|
473
473
|
pipeline to stop.
|
|
474
474
|
"""
|
|
475
475
|
|
|
476
|
-
|
|
477
|
-
for _, queue in
|
|
476
|
+
for view_queue in self.views_tx:
|
|
477
|
+
for _, queue in view_queue.items():
|
|
478
478
|
# sends a message to the callback runner to stop listening
|
|
479
479
|
queue.put(_CallbackRunnerInstruction.RanToCompletion)
|
|
480
|
+
|
|
481
|
+
if len(self.views_tx) > 0:
|
|
482
|
+
for view_name, queue in self.views_tx.pop().items():
|
|
480
483
|
# block until the callback runner has been stopped
|
|
481
484
|
queue.join()
|
|
482
485
|
|
|
@@ -530,15 +533,13 @@ metrics"""
|
|
|
530
533
|
|
|
531
534
|
def checkpoint(self, wait: bool = False, timeout_s=300) -> int:
|
|
532
535
|
"""
|
|
533
|
-
Checkpoints this pipeline
|
|
534
|
-
Fault Tolerance in Feldera:
|
|
535
|
-
<https://docs.feldera.com/pipelines/fault-tolerance/>
|
|
536
|
+
Checkpoints this pipeline.
|
|
536
537
|
|
|
537
538
|
:param wait: If true, will block until the checkpoint completes.
|
|
538
539
|
:param timeout_s: The maximum time (in seconds) to wait for the
|
|
539
540
|
checkpoint to complete.
|
|
540
541
|
|
|
541
|
-
:raises FelderaAPIError: If
|
|
542
|
+
:raises FelderaAPIError: If enterprise features are not enabled.
|
|
542
543
|
"""
|
|
543
544
|
|
|
544
545
|
seq = self.client.checkpoint_pipeline(self.name)
|
|
@@ -2,7 +2,7 @@ from feldera.rest.feldera_client import FelderaClient
|
|
|
2
2
|
from feldera.rest.pipeline import Pipeline as InnerPipeline
|
|
3
3
|
from feldera.pipeline import Pipeline
|
|
4
4
|
from feldera.enums import CompilationProfile
|
|
5
|
-
from feldera.runtime_config import RuntimeConfig
|
|
5
|
+
from feldera.runtime_config import RuntimeConfig
|
|
6
6
|
from feldera.rest.errors import FelderaAPIError
|
|
7
7
|
|
|
8
8
|
|
|
@@ -10,14 +10,16 @@ class PipelineBuilder:
|
|
|
10
10
|
"""
|
|
11
11
|
A builder for creating a Feldera Pipeline.
|
|
12
12
|
|
|
13
|
-
:param client: The
|
|
13
|
+
:param client: The :class:`.FelderaClient` instance
|
|
14
14
|
:param name: The name of the pipeline
|
|
15
15
|
:param description: The description of the pipeline
|
|
16
16
|
:param sql: The SQL code of the pipeline
|
|
17
17
|
:param udf_rust: Rust code for UDFs
|
|
18
18
|
:param udf_toml: Rust dependencies required by UDFs (in the TOML format)
|
|
19
|
-
:param compilation_profile: The
|
|
20
|
-
:param runtime_config: The
|
|
19
|
+
:param compilation_profile: The :class:`.CompilationProfile` to use
|
|
20
|
+
:param runtime_config: The :class:`.RuntimeConfig` to use. Enables
|
|
21
|
+
configuring the runtime behavior of the pipeline such as:
|
|
22
|
+
fault tolerance, storage and :class:`.Resources`
|
|
21
23
|
"""
|
|
22
24
|
|
|
23
25
|
def __init__(
|
|
@@ -29,7 +31,7 @@ class PipelineBuilder:
|
|
|
29
31
|
udf_toml: str = "",
|
|
30
32
|
description: str = "",
|
|
31
33
|
compilation_profile: CompilationProfile = CompilationProfile.OPTIMIZED,
|
|
32
|
-
runtime_config: RuntimeConfig = RuntimeConfig(
|
|
34
|
+
runtime_config: RuntimeConfig = RuntimeConfig.default(),
|
|
33
35
|
):
|
|
34
36
|
self.client: FelderaClient = client
|
|
35
37
|
self.name: str | None = name
|
|
@@ -404,7 +404,7 @@ Reason: The pipeline is in a STOPPED state due to the following error:
|
|
|
404
404
|
|
|
405
405
|
def checkpoint_pipeline(self, pipeline_name: str) -> int:
|
|
406
406
|
"""
|
|
407
|
-
Checkpoint a
|
|
407
|
+
Checkpoint a pipeline.
|
|
408
408
|
|
|
409
409
|
:param pipeline_name: The name of the pipeline to checkpoint
|
|
410
410
|
"""
|
|
@@ -454,11 +454,11 @@ Reason: The pipeline is in a STOPPED state due to the following error:
|
|
|
454
454
|
pipeline_name: str,
|
|
455
455
|
table_name: str,
|
|
456
456
|
format: str,
|
|
457
|
-
data: list[list | str | dict] | dict,
|
|
457
|
+
data: list[list | str | dict] | dict | str,
|
|
458
458
|
array: bool = False,
|
|
459
459
|
force: bool = False,
|
|
460
460
|
update_format: str = "raw",
|
|
461
|
-
json_flavor: str = None,
|
|
461
|
+
json_flavor: Optional[str] = None,
|
|
462
462
|
serialize: bool = True,
|
|
463
463
|
):
|
|
464
464
|
"""
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from typing import Optional, Any, Mapping
|
|
3
|
+
from feldera.enums import FaultToleranceModel
|
|
2
4
|
|
|
3
5
|
|
|
4
6
|
class Resources:
|
|
@@ -58,6 +60,11 @@ class Storage:
|
|
|
58
60
|
class RuntimeConfig:
|
|
59
61
|
"""
|
|
60
62
|
Runtime configuration class to define the configuration for a pipeline.
|
|
63
|
+
To create runtime config from a dictionary, use
|
|
64
|
+
:meth:`.RuntimeConfig.from_dict`.
|
|
65
|
+
|
|
66
|
+
Documentation:
|
|
67
|
+
https://docs.feldera.com/pipelines/configuration/#runtime-configuration
|
|
61
68
|
"""
|
|
62
69
|
|
|
63
70
|
def __init__(
|
|
@@ -72,6 +79,9 @@ class RuntimeConfig:
|
|
|
72
79
|
clock_resolution_usecs: Optional[int] = None,
|
|
73
80
|
provisioning_timeout_secs: Optional[int] = None,
|
|
74
81
|
resources: Optional[Resources] = None,
|
|
82
|
+
runtime_version: Optional[str] = None,
|
|
83
|
+
fault_tolerance_model: Optional[FaultToleranceModel] = None,
|
|
84
|
+
checkpoint_interval_secs: Optional[int] = None,
|
|
75
85
|
):
|
|
76
86
|
self.workers = workers
|
|
77
87
|
self.tracing = tracing
|
|
@@ -81,6 +91,14 @@ class RuntimeConfig:
|
|
|
81
91
|
self.min_batch_size_records = min_batch_size_records
|
|
82
92
|
self.clock_resolution_usecs = clock_resolution_usecs
|
|
83
93
|
self.provisioning_timeout_secs = provisioning_timeout_secs
|
|
94
|
+
self.runtime_version = runtime_version or os.environ.get(
|
|
95
|
+
"FELDERA_RUNTIME_VERSION"
|
|
96
|
+
)
|
|
97
|
+
if fault_tolerance_model is not None:
|
|
98
|
+
self.fault_tolerance = {
|
|
99
|
+
"model": str(fault_tolerance_model),
|
|
100
|
+
"checkpoint_interval_secs": checkpoint_interval_secs,
|
|
101
|
+
}
|
|
84
102
|
if resources is not None:
|
|
85
103
|
self.resources = resources.__dict__
|
|
86
104
|
if isinstance(storage, bool):
|
|
@@ -88,10 +106,14 @@ class RuntimeConfig:
|
|
|
88
106
|
if isinstance(storage, Storage):
|
|
89
107
|
self.storage = storage.__dict__
|
|
90
108
|
|
|
109
|
+
@staticmethod
|
|
110
|
+
def default() -> "RuntimeConfig":
|
|
111
|
+
return RuntimeConfig(resources=Resources())
|
|
112
|
+
|
|
91
113
|
@classmethod
|
|
92
114
|
def from_dict(cls, d: Mapping[str, Any]):
|
|
93
115
|
"""
|
|
94
|
-
Create a
|
|
116
|
+
Create a :class:`.RuntimeConfig` object from a dictionary.
|
|
95
117
|
"""
|
|
96
118
|
|
|
97
119
|
conf = cls()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: feldera
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.102.0
|
|
4
4
|
Summary: The feldera python client
|
|
5
5
|
Author-email: Feldera Team <dev@feldera.com>
|
|
6
6
|
License: MIT
|
|
@@ -54,10 +54,11 @@ If you have cloned the Feldera repo, you can install the python SDK as follows:
|
|
|
54
54
|
pip install python/
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
Checkout the docs [here](./feldera/__init__.py) for an example on how to use the SDK.
|
|
58
|
-
|
|
59
57
|
## Documentation
|
|
60
58
|
|
|
59
|
+
The Python SDK documentation is available at
|
|
60
|
+
[Feldera Python SDK Docs](https://docs.feldera.com/python).
|
|
61
|
+
|
|
61
62
|
To build the html documentation run:
|
|
62
63
|
|
|
63
64
|
Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
|
|
@@ -77,27 +78,23 @@ To clean the build, run `make clean`.
|
|
|
77
78
|
To run unit tests:
|
|
78
79
|
|
|
79
80
|
```bash
|
|
80
|
-
|
|
81
|
+
cd python && python3 -m pytest tests/
|
|
81
82
|
```
|
|
82
83
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
tests, you'll also need a broker available at `localhost:9092` and
|
|
88
|
-
(from the pipelines) `redpanda:19092`. (To change those locations,
|
|
89
|
-
set the environment variables listed in `python/tests/__init__.py`.)
|
|
90
|
-
|
|
91
|
-
```bash
|
|
92
|
-
(cd python && python3 -m pytest tests)
|
|
93
|
-
```
|
|
84
|
+
- This will detect and run all test files that match the pattern `test_*.py` or
|
|
85
|
+
`*_test.py`.
|
|
86
|
+
- By default, the tests expect a running Feldera instance at `http://localhost:8080`.
|
|
87
|
+
To override the default endpoint, set the `FELDERA_BASE_URL` environment variable.
|
|
94
88
|
|
|
95
89
|
To run tests from a specific file:
|
|
96
90
|
|
|
97
91
|
```bash
|
|
98
|
-
(cd python && python3 -m
|
|
92
|
+
(cd python && python3 -m pytest ./tests/path-to-file.py)
|
|
99
93
|
```
|
|
100
94
|
|
|
95
|
+
#### Running Aggregate Tests
|
|
96
|
+
|
|
97
|
+
The aggregate tests validate end-to-end correctness of SQL functionality.
|
|
101
98
|
To run the aggregate tests use:
|
|
102
99
|
|
|
103
100
|
```bash
|
|
@@ -105,6 +102,38 @@ cd python
|
|
|
105
102
|
PYTHONPATH=`pwd` python3 ./tests/aggregate_tests/main.py
|
|
106
103
|
```
|
|
107
104
|
|
|
105
|
+
### Reducing Compilation Cycles
|
|
106
|
+
|
|
107
|
+
To reduce redundant compilation cycles during testing:
|
|
108
|
+
|
|
109
|
+
* **Inherit from `SharedTestPipeline`** instead of `unittest.TestCase`.
|
|
110
|
+
* **Define DDLs** (e.g., `CREATE TABLE`, `CREATE VIEW`) in the **docstring** of each test method.
|
|
111
|
+
* All DDLs from all test functions in the class are combined and compiled into a single pipeline.
|
|
112
|
+
* If a table or view is already defined in one test, it can be used directly in others without redefinition.
|
|
113
|
+
* Ensure that all table and view names are unique within the class.
|
|
114
|
+
* Use `@enterprise_only` on tests that require Enterprise features. Their DDLs will be skipped on OSS builds.
|
|
115
|
+
* Use `self.set_runtime_config(...)` to override the default pipeline config.
|
|
116
|
+
* Reset it at the end using `self.reset_runtime_config()`.
|
|
117
|
+
* Access the shared pipeline via `self.pipeline`.
|
|
118
|
+
|
|
119
|
+
#### Example
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from tests.shared_test_pipeline import SharedTestPipeline
|
|
123
|
+
|
|
124
|
+
class TestAverage(SharedTestPipeline):
|
|
125
|
+
def test_average(self):
|
|
126
|
+
"""
|
|
127
|
+
CREATE TABLE students(id INT, name STRING);
|
|
128
|
+
CREATE MATERIALIZED VIEW v AS SELECT * FROM students;
|
|
129
|
+
"""
|
|
130
|
+
...
|
|
131
|
+
self.pipeline.start()
|
|
132
|
+
self.pipeline.input_pandas("students", df)
|
|
133
|
+
self.pipeline.wait_for_completion(True)
|
|
134
|
+
...
|
|
135
|
+
```
|
|
136
|
+
|
|
108
137
|
## Linting and formatting
|
|
109
138
|
|
|
110
139
|
Use [Ruff] to run the lint checks that will be executed by the
|
|
@@ -24,7 +24,7 @@ feldera/rest/feldera_config.py
|
|
|
24
24
|
feldera/rest/pipeline.py
|
|
25
25
|
feldera/rest/sql_table.py
|
|
26
26
|
feldera/rest/sql_view.py
|
|
27
|
-
tests/test_pipeline.py
|
|
28
27
|
tests/test_pipeline_builder.py
|
|
29
|
-
tests/
|
|
30
|
-
tests/
|
|
28
|
+
tests/test_shared_pipeline0.py
|
|
29
|
+
tests/test_shared_pipeline1.py
|
|
30
|
+
tests/test_udf.py
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
from tests import TEST_CLIENT
|
|
3
|
+
from feldera import PipelineBuilder
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestPipelineBuilder(unittest.TestCase):
|
|
7
|
+
def test_connector_orchestration(self):
|
|
8
|
+
sql = """
|
|
9
|
+
CREATE TABLE numbers (
|
|
10
|
+
num INT
|
|
11
|
+
) WITH (
|
|
12
|
+
'connectors' = '[
|
|
13
|
+
{
|
|
14
|
+
"name": "c1",
|
|
15
|
+
"paused": true,
|
|
16
|
+
"transport": {
|
|
17
|
+
"name": "datagen",
|
|
18
|
+
"config": {"plan": [{ "rate": 1, "fields": { "num": { "range": [0, 10], "strategy": "uniform" } } }]}
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
]'
|
|
22
|
+
);
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
name = "test_connector_orchestration"
|
|
26
|
+
|
|
27
|
+
pipeline = PipelineBuilder(TEST_CLIENT, name, sql=sql).create_or_replace()
|
|
28
|
+
pipeline.start()
|
|
29
|
+
|
|
30
|
+
pipeline.resume_connector("numbers", "c1")
|
|
31
|
+
stats = TEST_CLIENT.get_pipeline_stats(name)
|
|
32
|
+
c1_status = next(
|
|
33
|
+
item["paused"]
|
|
34
|
+
for item in stats["inputs"]
|
|
35
|
+
if item["endpoint_name"] == "numbers.c1"
|
|
36
|
+
)
|
|
37
|
+
assert not c1_status
|
|
38
|
+
|
|
39
|
+
pipeline.pause_connector("numbers", "c1")
|
|
40
|
+
stats = TEST_CLIENT.get_pipeline_stats(name)
|
|
41
|
+
c2_status = next(
|
|
42
|
+
item["paused"]
|
|
43
|
+
for item in stats["inputs"]
|
|
44
|
+
if item["endpoint_name"] == "numbers.c1"
|
|
45
|
+
)
|
|
46
|
+
assert c2_status
|
|
47
|
+
|
|
48
|
+
pipeline.stop(force=True)
|
|
49
|
+
pipeline.clear_storage()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if __name__ == "__main__":
|
|
53
|
+
unittest.main()
|