feldera 0.128.0__tar.gz → 0.129.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of feldera might be problematic. Click here for more details.
- {feldera-0.128.0 → feldera-0.129.0}/PKG-INFO +18 -7
- {feldera-0.128.0 → feldera-0.129.0}/README.md +17 -6
- {feldera-0.128.0 → feldera-0.129.0}/feldera/pipeline.py +19 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/_httprequests.py +15 -5
- {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/config.py +3 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/feldera_client.py +31 -4
- {feldera-0.128.0 → feldera-0.129.0}/feldera/stats.py +3 -3
- {feldera-0.128.0 → feldera-0.129.0}/feldera.egg-info/PKG-INFO +18 -7
- {feldera-0.128.0 → feldera-0.129.0}/feldera.egg-info/SOURCES.txt +1 -0
- {feldera-0.128.0 → feldera-0.129.0}/pyproject.toml +1 -1
- feldera-0.129.0/tests/test_checkpoint_sync.py +320 -0
- feldera-0.129.0/tests/test_issue4457.py +57 -0
- {feldera-0.128.0 → feldera-0.129.0}/tests/test_shared_pipeline.py +11 -0
- feldera-0.128.0/tests/test_checkpoint_sync.py +0 -319
- {feldera-0.128.0 → feldera-0.129.0}/feldera/__init__.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/_callback_runner.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/_helpers.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/enums.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/output_handler.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/pipeline_builder.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/__init__.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/_helpers.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/errors.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/feldera_config.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/pipeline.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/sql_table.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/sql_view.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera/runtime_config.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera.egg-info/dependency_links.txt +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera.egg-info/requires.txt +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/feldera.egg-info/top_level.txt +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/setup.cfg +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/tests/test_pipeline_builder.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/tests/test_shared_pipeline_stress.py +0 -0
- {feldera-0.128.0 → feldera-0.129.0}/tests/test_udf.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: feldera
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.129.0
|
|
4
4
|
Summary: The feldera python client
|
|
5
5
|
Author-email: Feldera Team <dev@feldera.com>
|
|
6
6
|
License: MIT
|
|
@@ -28,19 +28,19 @@ Feldera Python is the Feldera SDK for Python developers.
|
|
|
28
28
|
## Installation
|
|
29
29
|
|
|
30
30
|
```bash
|
|
31
|
-
pip install feldera
|
|
31
|
+
uv pip install feldera
|
|
32
32
|
```
|
|
33
33
|
|
|
34
34
|
### Installing from Github
|
|
35
35
|
|
|
36
36
|
```bash
|
|
37
|
-
pip install git+https://github.com/feldera/feldera#subdirectory=python
|
|
37
|
+
uv pip install git+https://github.com/feldera/feldera#subdirectory=python
|
|
38
38
|
```
|
|
39
39
|
|
|
40
40
|
Similarly, to install from a specific branch:
|
|
41
41
|
|
|
42
42
|
```bash
|
|
43
|
-
|
|
43
|
+
uv pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
|
|
44
44
|
```
|
|
45
45
|
|
|
46
46
|
Replace `{BRANCH_NAME}` with the name of the branch you want to install from.
|
|
@@ -51,7 +51,12 @@ If you have cloned the Feldera repo, you can install the python SDK as follows:
|
|
|
51
51
|
|
|
52
52
|
```bash
|
|
53
53
|
# the Feldera Python SDK is present inside the python/ directory
|
|
54
|
-
|
|
54
|
+
cd python
|
|
55
|
+
# If you don't have a virtual environment, create one
|
|
56
|
+
uv venv
|
|
57
|
+
source .venv/activate
|
|
58
|
+
# Install the SDK in editable mode
|
|
59
|
+
uv pip install .
|
|
55
60
|
```
|
|
56
61
|
|
|
57
62
|
## Documentation
|
|
@@ -61,7 +66,7 @@ The Python SDK documentation is available at
|
|
|
61
66
|
|
|
62
67
|
To build the html documentation run:
|
|
63
68
|
|
|
64
|
-
Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
|
|
69
|
+
Ensure that you have sphinx installed. If not, install it using `uv pip install sphinx`.
|
|
65
70
|
|
|
66
71
|
Then run the following commands:
|
|
67
72
|
|
|
@@ -92,7 +97,13 @@ To run tests from a specific file:
|
|
|
92
97
|
(cd python && python3 -m pytest ./tests/path-to-file.py)
|
|
93
98
|
```
|
|
94
99
|
|
|
95
|
-
|
|
100
|
+
To run a specific test:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
uv run python -m pytest tests/test_shared_pipeline.py::TestPipeline::test_adhoc_query_hash -v
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
#### Running All Tests
|
|
96
107
|
|
|
97
108
|
The tests validate end-to-end correctness of SQL functionality. To
|
|
98
109
|
run the tests use:
|
|
@@ -5,19 +5,19 @@ Feldera Python is the Feldera SDK for Python developers.
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
|
-
pip install feldera
|
|
8
|
+
uv pip install feldera
|
|
9
9
|
```
|
|
10
10
|
|
|
11
11
|
### Installing from Github
|
|
12
12
|
|
|
13
13
|
```bash
|
|
14
|
-
pip install git+https://github.com/feldera/feldera#subdirectory=python
|
|
14
|
+
uv pip install git+https://github.com/feldera/feldera#subdirectory=python
|
|
15
15
|
```
|
|
16
16
|
|
|
17
17
|
Similarly, to install from a specific branch:
|
|
18
18
|
|
|
19
19
|
```bash
|
|
20
|
-
|
|
20
|
+
uv pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
|
|
21
21
|
```
|
|
22
22
|
|
|
23
23
|
Replace `{BRANCH_NAME}` with the name of the branch you want to install from.
|
|
@@ -28,7 +28,12 @@ If you have cloned the Feldera repo, you can install the python SDK as follows:
|
|
|
28
28
|
|
|
29
29
|
```bash
|
|
30
30
|
# the Feldera Python SDK is present inside the python/ directory
|
|
31
|
-
|
|
31
|
+
cd python
|
|
32
|
+
# If you don't have a virtual environment, create one
|
|
33
|
+
uv venv
|
|
34
|
+
source .venv/activate
|
|
35
|
+
# Install the SDK in editable mode
|
|
36
|
+
uv pip install .
|
|
32
37
|
```
|
|
33
38
|
|
|
34
39
|
## Documentation
|
|
@@ -38,7 +43,7 @@ The Python SDK documentation is available at
|
|
|
38
43
|
|
|
39
44
|
To build the html documentation run:
|
|
40
45
|
|
|
41
|
-
Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
|
|
46
|
+
Ensure that you have sphinx installed. If not, install it using `uv pip install sphinx`.
|
|
42
47
|
|
|
43
48
|
Then run the following commands:
|
|
44
49
|
|
|
@@ -69,7 +74,13 @@ To run tests from a specific file:
|
|
|
69
74
|
(cd python && python3 -m pytest ./tests/path-to-file.py)
|
|
70
75
|
```
|
|
71
76
|
|
|
72
|
-
|
|
77
|
+
To run a specific test:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
uv run python -m pytest tests/test_shared_pipeline.py::TestPipeline::test_adhoc_query_hash -v
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
#### Running All Tests
|
|
73
84
|
|
|
74
85
|
The tests validate end-to-end correctness of SQL functionality. To
|
|
75
86
|
run the tests use:
|
|
@@ -809,6 +809,25 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
|
|
|
809
809
|
|
|
810
810
|
return self.client.query_as_text(self.name, query)
|
|
811
811
|
|
|
812
|
+
def query_hash(self, query: str):
|
|
813
|
+
"""
|
|
814
|
+
Executes an ad-hoc SQL query on this pipeline and returns the result
|
|
815
|
+
as a hash of the result set. This is useful for quickly checking
|
|
816
|
+
if the result set has changed without retrieving the entire result.
|
|
817
|
+
|
|
818
|
+
Note:
|
|
819
|
+
For a stable hash, the query must be deterministic which means
|
|
820
|
+
it should be sorted.
|
|
821
|
+
|
|
822
|
+
:param query: The SQL query to be executed.
|
|
823
|
+
|
|
824
|
+
:raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
|
|
825
|
+
state.
|
|
826
|
+
:raises FelderaAPIError: If querying a non materialized table or view.
|
|
827
|
+
:raises FelderaAPIError: If the query is invalid.
|
|
828
|
+
"""
|
|
829
|
+
return self.client.query_as_hash(self.name, query)
|
|
830
|
+
|
|
812
831
|
def execute(self, query: str):
|
|
813
832
|
"""
|
|
814
833
|
Executes an ad-hoc SQL query on the current pipeline, discarding its
|
|
@@ -57,6 +57,7 @@ class HttpRequests:
|
|
|
57
57
|
self.headers["Content-Type"] = content_type
|
|
58
58
|
|
|
59
59
|
try:
|
|
60
|
+
conn_timeout = self.config.connection_timeout
|
|
60
61
|
timeout = self.config.timeout
|
|
61
62
|
headers = self.headers
|
|
62
63
|
|
|
@@ -74,7 +75,7 @@ class HttpRequests:
|
|
|
74
75
|
if http_method.__name__ == "get":
|
|
75
76
|
request = http_method(
|
|
76
77
|
request_path,
|
|
77
|
-
timeout=timeout,
|
|
78
|
+
timeout=(conn_timeout, timeout),
|
|
78
79
|
headers=headers,
|
|
79
80
|
params=params,
|
|
80
81
|
stream=stream,
|
|
@@ -83,7 +84,7 @@ class HttpRequests:
|
|
|
83
84
|
elif isinstance(body, bytes):
|
|
84
85
|
request = http_method(
|
|
85
86
|
request_path,
|
|
86
|
-
timeout=timeout,
|
|
87
|
+
timeout=(conn_timeout, timeout),
|
|
87
88
|
headers=headers,
|
|
88
89
|
data=body,
|
|
89
90
|
params=params,
|
|
@@ -93,7 +94,7 @@ class HttpRequests:
|
|
|
93
94
|
else:
|
|
94
95
|
request = http_method(
|
|
95
96
|
request_path,
|
|
96
|
-
timeout=timeout,
|
|
97
|
+
timeout=(conn_timeout, timeout),
|
|
97
98
|
headers=headers,
|
|
98
99
|
data=json_serialize(body) if serialize else body,
|
|
99
100
|
params=params,
|
|
@@ -118,9 +119,18 @@ class HttpRequests:
|
|
|
118
119
|
time.sleep(2) # backoff, adjust as needed
|
|
119
120
|
continue
|
|
120
121
|
raise # re-raise for all other errors or if out of retries
|
|
122
|
+
except requests.exceptions.Timeout as err:
|
|
123
|
+
if attempt < max_retries:
|
|
124
|
+
logging.warning(
|
|
125
|
+
"HTTP Connection Timeout for %s, retrying (%d/%d)...",
|
|
126
|
+
path,
|
|
127
|
+
attempt + 1,
|
|
128
|
+
max_retries,
|
|
129
|
+
)
|
|
130
|
+
time.sleep(2)
|
|
131
|
+
continue
|
|
132
|
+
raise FelderaTimeoutError(str(err)) from err
|
|
121
133
|
|
|
122
|
-
except requests.exceptions.Timeout as err:
|
|
123
|
-
raise FelderaTimeoutError(str(err)) from err
|
|
124
134
|
except requests.exceptions.ConnectionError as err:
|
|
125
135
|
raise FelderaCommunicationError(str(err)) from err
|
|
126
136
|
|
|
@@ -12,6 +12,7 @@ class Config:
|
|
|
12
12
|
api_key: Optional[str] = None,
|
|
13
13
|
version: Optional[str] = None,
|
|
14
14
|
timeout: Optional[float] = None,
|
|
15
|
+
connection_timeout: Optional[float] = None,
|
|
15
16
|
requests_verify: bool = True,
|
|
16
17
|
) -> None:
|
|
17
18
|
"""
|
|
@@ -19,6 +20,7 @@ class Config:
|
|
|
19
20
|
:param api_key: The optional API key to access Feldera
|
|
20
21
|
:param version: The version of the API to use
|
|
21
22
|
:param timeout: The timeout for the HTTP requests
|
|
23
|
+
:param connection_timeout: The connection timeout for the HTTP requests
|
|
22
24
|
:param requests_verify: The `verify` parameter passed to the requests
|
|
23
25
|
library. `True` by default.
|
|
24
26
|
"""
|
|
@@ -27,4 +29,5 @@ class Config:
|
|
|
27
29
|
self.api_key: Optional[str] = api_key
|
|
28
30
|
self.version: Optional[str] = version or "v0"
|
|
29
31
|
self.timeout: Optional[float] = timeout
|
|
32
|
+
self.connection_timeout: Optional[float] = connection_timeout
|
|
30
33
|
self.requests_verify: bool = requests_verify
|
|
@@ -46,20 +46,26 @@ class FelderaClient:
|
|
|
46
46
|
url: str,
|
|
47
47
|
api_key: Optional[str] = None,
|
|
48
48
|
timeout: Optional[float] = None,
|
|
49
|
+
connection_timeout: Optional[float] = None,
|
|
49
50
|
requests_verify: bool = True,
|
|
50
51
|
) -> None:
|
|
51
52
|
"""
|
|
52
53
|
:param url: The url to Feldera API (ex: https://try.feldera.com)
|
|
53
54
|
:param api_key: The optional API key for Feldera
|
|
54
|
-
:param timeout: (optional) The amount of time in seconds that the
|
|
55
|
-
will wait for a response before timing
|
|
56
|
-
|
|
55
|
+
:param timeout: (optional) The amount of time in seconds that the
|
|
56
|
+
client will wait for a response before timing out.
|
|
57
|
+
:param connection_timeout: (optional) The amount of time in seconds that
|
|
58
|
+
the client will wait to establish connection before timing out.
|
|
57
59
|
:param requests_verify: The `verify` parameter passed to the requests
|
|
58
60
|
library. `True` by default.
|
|
59
61
|
"""
|
|
60
62
|
|
|
61
63
|
self.config = Config(
|
|
62
|
-
url,
|
|
64
|
+
url,
|
|
65
|
+
api_key,
|
|
66
|
+
timeout=timeout,
|
|
67
|
+
connection_timeout=connection_timeout,
|
|
68
|
+
requests_verify=requests_verify,
|
|
63
69
|
)
|
|
64
70
|
self.http = HttpRequests(self.config)
|
|
65
71
|
|
|
@@ -895,6 +901,27 @@ Reason: The pipeline is in a STOPPED state due to the following error:
|
|
|
895
901
|
if chunk:
|
|
896
902
|
yield chunk.decode("utf-8")
|
|
897
903
|
|
|
904
|
+
def query_as_hash(self, pipeline_name: str, query: str) -> str:
|
|
905
|
+
"""
|
|
906
|
+
Executes an ad-hoc query on the specified pipeline and returns a hash of the result.
|
|
907
|
+
|
|
908
|
+
:param pipeline_name: The name of the pipeline to query.
|
|
909
|
+
:param query: The SQL query to be executed.
|
|
910
|
+
:return: A string containing the hash of the query result.
|
|
911
|
+
"""
|
|
912
|
+
params = {
|
|
913
|
+
"pipeline_name": pipeline_name,
|
|
914
|
+
"sql": query,
|
|
915
|
+
"format": "hash",
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
resp = self.http.get(
|
|
919
|
+
path=f"/pipelines/{pipeline_name}/query",
|
|
920
|
+
params=params,
|
|
921
|
+
stream=False,
|
|
922
|
+
)
|
|
923
|
+
return resp
|
|
924
|
+
|
|
898
925
|
def query_as_parquet(self, pipeline_name: str, query: str, path: str):
|
|
899
926
|
"""
|
|
900
927
|
Executes an ad-hoc query on the specified pipeline and saves the result to a parquet file.
|
|
@@ -26,8 +26,8 @@ class PipelineStatistics:
|
|
|
26
26
|
pipeline.inputs = [
|
|
27
27
|
InputEndpointStatus.from_dict(input) for input in d["inputs"]
|
|
28
28
|
]
|
|
29
|
-
pipeline.
|
|
30
|
-
OutputEndpointStatus
|
|
29
|
+
pipeline.outputs = [
|
|
30
|
+
OutputEndpointStatus.from_dict(output) for output in d["outputs"]
|
|
31
31
|
]
|
|
32
32
|
return pipeline
|
|
33
33
|
|
|
@@ -46,7 +46,7 @@ class GlobalPipelineMetrics:
|
|
|
46
46
|
self.rss_bytes: Optional[int] = None
|
|
47
47
|
self.cpu_msecs: Optional[int] = None
|
|
48
48
|
self.start_time: Optional[datetime] = None
|
|
49
|
-
self.incarnation_uuid: Optional[uuid] = None
|
|
49
|
+
self.incarnation_uuid: Optional[uuid.UUID] = None
|
|
50
50
|
self.storage_bytes: Optional[int] = None
|
|
51
51
|
self.storage_mb_secs: Optional[int] = None
|
|
52
52
|
self.runtime_elapsed_msecs: Optional[int] = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: feldera
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.129.0
|
|
4
4
|
Summary: The feldera python client
|
|
5
5
|
Author-email: Feldera Team <dev@feldera.com>
|
|
6
6
|
License: MIT
|
|
@@ -28,19 +28,19 @@ Feldera Python is the Feldera SDK for Python developers.
|
|
|
28
28
|
## Installation
|
|
29
29
|
|
|
30
30
|
```bash
|
|
31
|
-
pip install feldera
|
|
31
|
+
uv pip install feldera
|
|
32
32
|
```
|
|
33
33
|
|
|
34
34
|
### Installing from Github
|
|
35
35
|
|
|
36
36
|
```bash
|
|
37
|
-
pip install git+https://github.com/feldera/feldera#subdirectory=python
|
|
37
|
+
uv pip install git+https://github.com/feldera/feldera#subdirectory=python
|
|
38
38
|
```
|
|
39
39
|
|
|
40
40
|
Similarly, to install from a specific branch:
|
|
41
41
|
|
|
42
42
|
```bash
|
|
43
|
-
|
|
43
|
+
uv pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
|
|
44
44
|
```
|
|
45
45
|
|
|
46
46
|
Replace `{BRANCH_NAME}` with the name of the branch you want to install from.
|
|
@@ -51,7 +51,12 @@ If you have cloned the Feldera repo, you can install the python SDK as follows:
|
|
|
51
51
|
|
|
52
52
|
```bash
|
|
53
53
|
# the Feldera Python SDK is present inside the python/ directory
|
|
54
|
-
|
|
54
|
+
cd python
|
|
55
|
+
# If you don't have a virtual environment, create one
|
|
56
|
+
uv venv
|
|
57
|
+
source .venv/activate
|
|
58
|
+
# Install the SDK in editable mode
|
|
59
|
+
uv pip install .
|
|
55
60
|
```
|
|
56
61
|
|
|
57
62
|
## Documentation
|
|
@@ -61,7 +66,7 @@ The Python SDK documentation is available at
|
|
|
61
66
|
|
|
62
67
|
To build the html documentation run:
|
|
63
68
|
|
|
64
|
-
Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
|
|
69
|
+
Ensure that you have sphinx installed. If not, install it using `uv pip install sphinx`.
|
|
65
70
|
|
|
66
71
|
Then run the following commands:
|
|
67
72
|
|
|
@@ -92,7 +97,13 @@ To run tests from a specific file:
|
|
|
92
97
|
(cd python && python3 -m pytest ./tests/path-to-file.py)
|
|
93
98
|
```
|
|
94
99
|
|
|
95
|
-
|
|
100
|
+
To run a specific test:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
uv run python -m pytest tests/test_shared_pipeline.py::TestPipeline::test_adhoc_query_hash -v
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
#### Running All Tests
|
|
96
107
|
|
|
97
108
|
The tests validate end-to-end correctness of SQL functionality. To
|
|
98
109
|
run the tests use:
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
# from tests.shared_test_pipeline import SharedTestPipeline
|
|
2
|
+
# from tests import enterprise_only
|
|
3
|
+
# from feldera.runtime_config import RuntimeConfig, Storage
|
|
4
|
+
# from feldera.enums import PipelineStatus, FaultToleranceModel
|
|
5
|
+
# from typing import Optional
|
|
6
|
+
# import os
|
|
7
|
+
# import sys
|
|
8
|
+
# import time
|
|
9
|
+
# from uuid import uuid4
|
|
10
|
+
# import random
|
|
11
|
+
# import pytest
|
|
12
|
+
#
|
|
13
|
+
#
|
|
14
|
+
# DEFAULT_ENDPOINT = os.environ.get(
|
|
15
|
+
# "DEFAULT_MINIO_ENDPOINT", "http://minio.extra.svc.cluster.local:9000"
|
|
16
|
+
# )
|
|
17
|
+
# DEFAULT_BUCKET = "default"
|
|
18
|
+
# ACCESS_KEY = "minio"
|
|
19
|
+
# SECRET_KEY = "miniopasswd"
|
|
20
|
+
#
|
|
21
|
+
#
|
|
22
|
+
# def storage_cfg(
|
|
23
|
+
# pipeline_name: str,
|
|
24
|
+
# endpoint: Optional[str] = None,
|
|
25
|
+
# start_from_checkpoint: Optional[str] = None,
|
|
26
|
+
# strict: bool = False,
|
|
27
|
+
# auth_err: bool = False,
|
|
28
|
+
# standby: bool = False,
|
|
29
|
+
# pull_interval: int = 2,
|
|
30
|
+
# ) -> dict:
|
|
31
|
+
# return {
|
|
32
|
+
# "backend": {
|
|
33
|
+
# "name": "file",
|
|
34
|
+
# "config": {
|
|
35
|
+
# "sync": {
|
|
36
|
+
# "bucket": f"{DEFAULT_BUCKET}/{pipeline_name}",
|
|
37
|
+
# "access_key": ACCESS_KEY,
|
|
38
|
+
# "secret_key": SECRET_KEY if not auth_err else SECRET_KEY + "extra",
|
|
39
|
+
# "provider": "Minio",
|
|
40
|
+
# "endpoint": endpoint or DEFAULT_ENDPOINT,
|
|
41
|
+
# "start_from_checkpoint": start_from_checkpoint,
|
|
42
|
+
# "fail_if_no_checkpoint": strict,
|
|
43
|
+
# "standby": standby,
|
|
44
|
+
# "pull_interval": pull_interval,
|
|
45
|
+
# }
|
|
46
|
+
# },
|
|
47
|
+
# }
|
|
48
|
+
# }
|
|
49
|
+
#
|
|
50
|
+
#
|
|
51
|
+
# class TestCheckpointSync(SharedTestPipeline):
|
|
52
|
+
# @enterprise_only
|
|
53
|
+
# def test_checkpoint_sync(
|
|
54
|
+
# self,
|
|
55
|
+
# from_uuid: bool = False,
|
|
56
|
+
# random_uuid: bool = False,
|
|
57
|
+
# clear_storage: bool = True,
|
|
58
|
+
# auth_err: bool = False,
|
|
59
|
+
# strict: bool = False,
|
|
60
|
+
# expect_empty: bool = False,
|
|
61
|
+
# standby: bool = False,
|
|
62
|
+
# ):
|
|
63
|
+
# """
|
|
64
|
+
# CREATE TABLE t0 (c0 INT, c1 VARCHAR);
|
|
65
|
+
# CREATE MATERIALIZED VIEW v0 AS SELECT * FROM t0;
|
|
66
|
+
# """
|
|
67
|
+
#
|
|
68
|
+
# storage_config = storage_cfg(self.pipeline.name)
|
|
69
|
+
# ft = FaultToleranceModel.AtLeastOnce
|
|
70
|
+
#
|
|
71
|
+
# self.pipeline.set_runtime_config(
|
|
72
|
+
# RuntimeConfig(
|
|
73
|
+
# fault_tolerance_model=ft, storage=Storage(config=storage_config)
|
|
74
|
+
# )
|
|
75
|
+
# )
|
|
76
|
+
# self.pipeline.start()
|
|
77
|
+
#
|
|
78
|
+
# random.seed(time.time())
|
|
79
|
+
# total = random.randint(10, 20)
|
|
80
|
+
# data = [{"c0": i, "c1": str(i)} for i in range(1, total)]
|
|
81
|
+
# self.pipeline.input_json("t0", data)
|
|
82
|
+
# self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
|
|
83
|
+
#
|
|
84
|
+
# start = time.time()
|
|
85
|
+
# timeout = 5
|
|
86
|
+
#
|
|
87
|
+
# while True:
|
|
88
|
+
# processed = self.pipeline.stats().global_metrics.total_processed_records
|
|
89
|
+
# if processed == total:
|
|
90
|
+
# break
|
|
91
|
+
#
|
|
92
|
+
# if time.time() - start > timeout:
|
|
93
|
+
# raise TimeoutError(
|
|
94
|
+
# f"timed out while waiting for pipeline to process {total} records"
|
|
95
|
+
# )
|
|
96
|
+
#
|
|
97
|
+
# time.sleep(0.1)
|
|
98
|
+
#
|
|
99
|
+
# got_before = list(self.pipeline.query("SELECT * FROM v0"))
|
|
100
|
+
# print(f"{self.pipeline.name}: records: {total}, {got_before}", file=sys.stderr)
|
|
101
|
+
#
|
|
102
|
+
# if len(got_before) != processed:
|
|
103
|
+
# raise RuntimeError(
|
|
104
|
+
# f"adhoc query returned {len(got_before)} but {processed} records were processed: {got_before}"
|
|
105
|
+
# )
|
|
106
|
+
#
|
|
107
|
+
# self.pipeline.checkpoint(wait=True)
|
|
108
|
+
# uuid = self.pipeline.sync_checkpoint(wait=True)
|
|
109
|
+
#
|
|
110
|
+
# self.pipeline.stop(force=True)
|
|
111
|
+
#
|
|
112
|
+
# if clear_storage:
|
|
113
|
+
# self.pipeline.clear_storage()
|
|
114
|
+
#
|
|
115
|
+
# if random_uuid:
|
|
116
|
+
# uuid = uuid4()
|
|
117
|
+
#
|
|
118
|
+
# # Restart pipeline from checkpoint
|
|
119
|
+
# storage_config = storage_cfg(
|
|
120
|
+
# pipeline_name=self.pipeline.name,
|
|
121
|
+
# start_from_checkpoint=uuid if from_uuid else "latest",
|
|
122
|
+
# auth_err=auth_err,
|
|
123
|
+
# strict=strict,
|
|
124
|
+
# standby=standby,
|
|
125
|
+
# )
|
|
126
|
+
# self.pipeline.set_runtime_config(
|
|
127
|
+
# RuntimeConfig(
|
|
128
|
+
# fault_tolerance_model=ft, storage=Storage(config=storage_config)
|
|
129
|
+
# )
|
|
130
|
+
# )
|
|
131
|
+
#
|
|
132
|
+
# if not standby:
|
|
133
|
+
# self.pipeline.start()
|
|
134
|
+
# else:
|
|
135
|
+
# self.pipeline.start(wait=False)
|
|
136
|
+
#
|
|
137
|
+
# # wait for the pipeline to initialize
|
|
138
|
+
# start = time.monotonic()
|
|
139
|
+
# # wait for a maximum of 120 seconds for the pipeline to provison
|
|
140
|
+
# end = start + 120
|
|
141
|
+
#
|
|
142
|
+
# # wait for the pipeline to finish provisoning
|
|
143
|
+
# for log in self.pipeline.logs():
|
|
144
|
+
# if "checkpoint pulled successfully" in log:
|
|
145
|
+
# break
|
|
146
|
+
#
|
|
147
|
+
# if time.monotonic() > end:
|
|
148
|
+
# raise TimeoutError(
|
|
149
|
+
# f"{self.pipeline.name} timedout waiting to pull checkpoint"
|
|
150
|
+
# )
|
|
151
|
+
#
|
|
152
|
+
# if standby:
|
|
153
|
+
# # wait for 8 seconds, this should be more than enough time
|
|
154
|
+
# time.sleep(8)
|
|
155
|
+
# assert self.pipeline.status() == PipelineStatus.INITIALIZING
|
|
156
|
+
#
|
|
157
|
+
# self.pipeline.activate(timeout_s=10)
|
|
158
|
+
#
|
|
159
|
+
# got_after = list(self.pipeline.query("SELECT * FROM v0"))
|
|
160
|
+
#
|
|
161
|
+
# print(
|
|
162
|
+
# f"{self.pipeline.name}: after: {len(got_after)}, {got_after}",
|
|
163
|
+
# file=sys.stderr,
|
|
164
|
+
# )
|
|
165
|
+
#
|
|
166
|
+
# if expect_empty:
|
|
167
|
+
# got_before = []
|
|
168
|
+
#
|
|
169
|
+
# self.assertCountEqual(got_before, got_after)
|
|
170
|
+
#
|
|
171
|
+
# self.pipeline.stop(force=True)
|
|
172
|
+
#
|
|
173
|
+
# if clear_storage:
|
|
174
|
+
# self.pipeline.clear_storage()
|
|
175
|
+
#
|
|
176
|
+
# @enterprise_only
|
|
177
|
+
# def test_from_uuid(self):
|
|
178
|
+
# self.test_checkpoint_sync(from_uuid=True)
|
|
179
|
+
#
|
|
180
|
+
# @enterprise_only
|
|
181
|
+
# def test_without_clearing_storage(self):
|
|
182
|
+
# self.test_checkpoint_sync(clear_storage=False)
|
|
183
|
+
#
|
|
184
|
+
# @enterprise_only
|
|
185
|
+
# def test_autherr_fail(self):
|
|
186
|
+
# with self.assertRaisesRegex(RuntimeError, "SignatureDoesNotMatch"):
|
|
187
|
+
# self.test_checkpoint_sync(auth_err=True, strict=True)
|
|
188
|
+
#
|
|
189
|
+
# @enterprise_only
|
|
190
|
+
# def test_autherr(self):
|
|
191
|
+
# self.test_checkpoint_sync(auth_err=True, strict=False, expect_empty=True)
|
|
192
|
+
#
|
|
193
|
+
# @enterprise_only
|
|
194
|
+
# def test_nonexistent_checkpoint_fail(self):
|
|
195
|
+
# with self.assertRaisesRegex(RuntimeError, "were not found in source"):
|
|
196
|
+
# self.test_checkpoint_sync(random_uuid=True, from_uuid=True, strict=True)
|
|
197
|
+
#
|
|
198
|
+
# @enterprise_only
|
|
199
|
+
# def test_nonexistent_checkpoint(self):
|
|
200
|
+
# self.test_checkpoint_sync(random_uuid=True, from_uuid=True, expect_empty=True)
|
|
201
|
+
#
|
|
202
|
+
# @enterprise_only
|
|
203
|
+
# def test_standby_activation(self):
|
|
204
|
+
# self.test_checkpoint_sync(standby=True)
|
|
205
|
+
#
|
|
206
|
+
# @enterprise_only
|
|
207
|
+
# def test_standby_activation_from_uuid(self):
|
|
208
|
+
# self.test_checkpoint_sync(standby=True, from_uuid=True)
|
|
209
|
+
#
|
|
210
|
+
# @enterprise_only
|
|
211
|
+
# def test_standby_fallback(self, from_uuid: bool = False):
|
|
212
|
+
# # Step 1: Start main pipeline
|
|
213
|
+
# storage_config = storage_cfg(self.pipeline.name)
|
|
214
|
+
# ft = FaultToleranceModel.AtLeastOnce
|
|
215
|
+
# self.pipeline.set_runtime_config(
|
|
216
|
+
# RuntimeConfig(
|
|
217
|
+
# fault_tolerance_model=ft, storage=Storage(config=storage_config)
|
|
218
|
+
# )
|
|
219
|
+
# )
|
|
220
|
+
# self.pipeline.start()
|
|
221
|
+
#
|
|
222
|
+
# # Insert initial data
|
|
223
|
+
# random.seed(time.time())
|
|
224
|
+
# total_initial = random.randint(10, 20)
|
|
225
|
+
# data_initial = [{"c0": i, "c1": str(i)} for i in range(1, total_initial)]
|
|
226
|
+
# self.pipeline.input_json("t0", data_initial)
|
|
227
|
+
# self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
|
|
228
|
+
# self.pipeline.wait_for_completion()
|
|
229
|
+
#
|
|
230
|
+
# got_before = list(self.pipeline.query("select * from v0"))
|
|
231
|
+
#
|
|
232
|
+
# # Step 2: Create checkpoint and sync
|
|
233
|
+
# self.pipeline.checkpoint(wait=True)
|
|
234
|
+
# uuid = self.pipeline.sync_checkpoint(wait=True)
|
|
235
|
+
#
|
|
236
|
+
# # Step 3: Start standby pipeline
|
|
237
|
+
# standby = self.new_pipeline_with_suffix("standby")
|
|
238
|
+
# pull_interval = 1
|
|
239
|
+
# standby.set_runtime_config(
|
|
240
|
+
# RuntimeConfig(
|
|
241
|
+
# fault_tolerance_model=ft,
|
|
242
|
+
# storage=Storage(
|
|
243
|
+
# config=storage_cfg(
|
|
244
|
+
# self.pipeline.name,
|
|
245
|
+
# start_from_checkpoint=uuid if from_uuid else "latest",
|
|
246
|
+
# standby=True,
|
|
247
|
+
# pull_interval=pull_interval,
|
|
248
|
+
# )
|
|
249
|
+
# ),
|
|
250
|
+
# )
|
|
251
|
+
# )
|
|
252
|
+
# standby.start(wait=False)
|
|
253
|
+
#
|
|
254
|
+
# # Wait until standby pulls the first checkpoint
|
|
255
|
+
# start = time.monotonic()
|
|
256
|
+
# end = start + 120
|
|
257
|
+
# for log in standby.logs():
|
|
258
|
+
# if "checkpoint pulled successfully" in log:
|
|
259
|
+
# break
|
|
260
|
+
# if time.monotonic() > end:
|
|
261
|
+
# raise TimeoutError(
|
|
262
|
+
# "Timed out waiting for standby pipeline to pull checkpoint"
|
|
263
|
+
# )
|
|
264
|
+
#
|
|
265
|
+
# # Step 4: Add more data and make 3-10 checkpoints
|
|
266
|
+
# extra_ckpts = random.randint(3, 10)
|
|
267
|
+
# total_additional = 0
|
|
268
|
+
#
|
|
269
|
+
# for i in range(extra_ckpts):
|
|
270
|
+
# new_val = 100 + i
|
|
271
|
+
# new_data = [{"c0": new_val, "c1": f"extra_{new_val}"}]
|
|
272
|
+
# self.pipeline.input_json("t0", new_data)
|
|
273
|
+
# self.pipeline.wait_for_completion()
|
|
274
|
+
# total_additional += 1
|
|
275
|
+
# self.pipeline.checkpoint(wait=True)
|
|
276
|
+
# self.pipeline.sync_checkpoint(wait=True)
|
|
277
|
+
# time.sleep(0.2)
|
|
278
|
+
#
|
|
279
|
+
# got_expected = (
|
|
280
|
+
# got_before if from_uuid else list(self.pipeline.query("SELECT * FROM v0"))
|
|
281
|
+
# )
|
|
282
|
+
# print(
|
|
283
|
+
# f"{self.pipeline.name}: final records before shutdown: {got_expected}",
|
|
284
|
+
# file=sys.stderr,
|
|
285
|
+
# )
|
|
286
|
+
#
|
|
287
|
+
# # Step 5: Stop main and activate standby
|
|
288
|
+
# self.pipeline.stop(force=True)
|
|
289
|
+
#
|
|
290
|
+
# assert standby.status() == PipelineStatus.INITIALIZING
|
|
291
|
+
# standby.activate(timeout_s=(pull_interval * extra_ckpts) + 60)
|
|
292
|
+
#
|
|
293
|
+
# for log in standby.logs():
|
|
294
|
+
# if "activated" in log:
|
|
295
|
+
# break
|
|
296
|
+
# if time.monotonic() > end:
|
|
297
|
+
# raise TimeoutError("Timed out waiting for standby pipeline to activate")
|
|
298
|
+
#
|
|
299
|
+
# # Step 6: Validate standby has all expected records
|
|
300
|
+
# got_after = list(standby.query("SELECT * FROM v0"))
|
|
301
|
+
# print(
|
|
302
|
+
# f"{standby.name}: final records after activation: {got_after}",
|
|
303
|
+
# file=sys.stderr,
|
|
304
|
+
# )
|
|
305
|
+
# self.assertCountEqual(got_expected, got_after)
|
|
306
|
+
#
|
|
307
|
+
# # Cleanup
|
|
308
|
+
# standby.stop(force=True)
|
|
309
|
+
#
|
|
310
|
+
# standby.start()
|
|
311
|
+
# got_final = list(standby.query("SELECT * FROM v0"))
|
|
312
|
+
# standby.stop(force=True)
|
|
313
|
+
#
|
|
314
|
+
# self.assertCountEqual(got_after, got_final)
|
|
315
|
+
#
|
|
316
|
+
# self.pipeline.clear_storage()
|
|
317
|
+
#
|
|
318
|
+
# @enterprise_only
|
|
319
|
+
# def test_standby_fallback_from_uuid(self):
|
|
320
|
+
# self.test_standby_fallback(from_uuid=True)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
|
|
3
|
+
from pandas import Timestamp
|
|
4
|
+
from feldera import PipelineBuilder
|
|
5
|
+
from tests import TEST_CLIENT
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestIssue_4457(unittest.TestCase):
|
|
9
|
+
def test_local(self):
|
|
10
|
+
sql = """
|
|
11
|
+
CREATE TABLE test_events (
|
|
12
|
+
id VARCHAR NOT NULL PRIMARY KEY,
|
|
13
|
+
a VARCHAR,
|
|
14
|
+
t TIMESTAMP NOT NULL LATENESS INTERVAL 1 MINUTE
|
|
15
|
+
);
|
|
16
|
+
CREATE VIEW V AS SELECT * FROM test_events;
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
pipeline = PipelineBuilder(
|
|
20
|
+
TEST_CLIENT, name="test_issue4457", sql=sql
|
|
21
|
+
).create_or_replace()
|
|
22
|
+
|
|
23
|
+
# TODO: use .query() instead
|
|
24
|
+
out = pipeline.listen("v")
|
|
25
|
+
|
|
26
|
+
pipeline.start()
|
|
27
|
+
|
|
28
|
+
pipeline.input_json(
|
|
29
|
+
"test_events",
|
|
30
|
+
[{"id": "a", "a": "test4", "t": "2025-05-20 21:00:17.920"}],
|
|
31
|
+
)
|
|
32
|
+
pipeline.wait_for_idle()
|
|
33
|
+
|
|
34
|
+
output = out.to_dict()
|
|
35
|
+
assert output == [
|
|
36
|
+
{
|
|
37
|
+
"id": "a",
|
|
38
|
+
"a": "test4",
|
|
39
|
+
"t": Timestamp("2025-05-20 21:00:17.920"),
|
|
40
|
+
"insert_delete": 1,
|
|
41
|
+
}
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
pipeline.input_json(
|
|
45
|
+
"test_events",
|
|
46
|
+
[{"id": "a", "a": "test5", "t": "2025-03-20 21:00:17.920"}],
|
|
47
|
+
)
|
|
48
|
+
pipeline.wait_for_idle()
|
|
49
|
+
|
|
50
|
+
output = out.to_dict()
|
|
51
|
+
assert output == []
|
|
52
|
+
|
|
53
|
+
pipeline.stop(force=True)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
if __name__ == "__main__":
|
|
57
|
+
unittest.main()
|
|
@@ -84,6 +84,17 @@ class TestPipeline(SharedTestPipeline):
|
|
|
84
84
|
got = "\n".join(resp)
|
|
85
85
|
assert got in expected
|
|
86
86
|
|
|
87
|
+
def test_adhoc_query_hash(self):
|
|
88
|
+
data = "1\n2\n"
|
|
89
|
+
self.pipeline.start()
|
|
90
|
+
TEST_CLIENT.push_to_pipeline(self.pipeline.name, "tbl", "csv", data)
|
|
91
|
+
resp = TEST_CLIENT.query_as_hash(
|
|
92
|
+
self.pipeline.name, "SELECT * FROM tbl ORDER BY id"
|
|
93
|
+
)
|
|
94
|
+
assert (
|
|
95
|
+
resp == "0B021466CA428474EF16F899D0F841D7338C168C063DA5DB43666D1AB3081558"
|
|
96
|
+
)
|
|
97
|
+
|
|
87
98
|
def test_adhoc_query_parquet(self):
|
|
88
99
|
data = "1\n2\n"
|
|
89
100
|
self.pipeline.start()
|
|
@@ -1,319 +0,0 @@
|
|
|
1
|
-
from tests.shared_test_pipeline import SharedTestPipeline
|
|
2
|
-
from tests import enterprise_only
|
|
3
|
-
from feldera.runtime_config import RuntimeConfig, Storage
|
|
4
|
-
from feldera.enums import PipelineStatus, FaultToleranceModel
|
|
5
|
-
from typing import Optional
|
|
6
|
-
import os
|
|
7
|
-
import sys
|
|
8
|
-
import time
|
|
9
|
-
from uuid import uuid4
|
|
10
|
-
import random
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
DEFAULT_ENDPOINT = os.environ.get(
|
|
14
|
-
"DEFAULT_MINIO_ENDPOINT", "http://minio.extra.svc.cluster.local:9000"
|
|
15
|
-
)
|
|
16
|
-
DEFAULT_BUCKET = "default"
|
|
17
|
-
ACCESS_KEY = "minio"
|
|
18
|
-
SECRET_KEY = "miniopasswd"
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def storage_cfg(
|
|
22
|
-
pipeline_name: str,
|
|
23
|
-
endpoint: Optional[str] = None,
|
|
24
|
-
start_from_checkpoint: Optional[str] = None,
|
|
25
|
-
strict: bool = False,
|
|
26
|
-
auth_err: bool = False,
|
|
27
|
-
standby: bool = False,
|
|
28
|
-
pull_interval: int = 2,
|
|
29
|
-
) -> dict:
|
|
30
|
-
return {
|
|
31
|
-
"backend": {
|
|
32
|
-
"name": "file",
|
|
33
|
-
"config": {
|
|
34
|
-
"sync": {
|
|
35
|
-
"bucket": f"{DEFAULT_BUCKET}/{pipeline_name}",
|
|
36
|
-
"access_key": ACCESS_KEY,
|
|
37
|
-
"secret_key": SECRET_KEY if not auth_err else SECRET_KEY + "extra",
|
|
38
|
-
"provider": "Minio",
|
|
39
|
-
"endpoint": endpoint or DEFAULT_ENDPOINT,
|
|
40
|
-
"start_from_checkpoint": start_from_checkpoint,
|
|
41
|
-
"fail_if_no_checkpoint": strict,
|
|
42
|
-
"standby": standby,
|
|
43
|
-
"pull_interval": pull_interval,
|
|
44
|
-
}
|
|
45
|
-
},
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class TestCheckpointSync(SharedTestPipeline):
|
|
51
|
-
@enterprise_only
|
|
52
|
-
def test_checkpoint_sync(
|
|
53
|
-
self,
|
|
54
|
-
from_uuid: bool = False,
|
|
55
|
-
random_uuid: bool = False,
|
|
56
|
-
clear_storage: bool = True,
|
|
57
|
-
auth_err: bool = False,
|
|
58
|
-
strict: bool = False,
|
|
59
|
-
expect_empty: bool = False,
|
|
60
|
-
standby: bool = False,
|
|
61
|
-
):
|
|
62
|
-
"""
|
|
63
|
-
CREATE TABLE t0 (c0 INT, c1 VARCHAR);
|
|
64
|
-
CREATE MATERIALIZED VIEW v0 AS SELECT * FROM t0;
|
|
65
|
-
"""
|
|
66
|
-
|
|
67
|
-
storage_config = storage_cfg(self.pipeline.name)
|
|
68
|
-
ft = FaultToleranceModel.AtLeastOnce
|
|
69
|
-
|
|
70
|
-
self.pipeline.set_runtime_config(
|
|
71
|
-
RuntimeConfig(
|
|
72
|
-
fault_tolerance_model=ft, storage=Storage(config=storage_config)
|
|
73
|
-
)
|
|
74
|
-
)
|
|
75
|
-
self.pipeline.start()
|
|
76
|
-
|
|
77
|
-
random.seed(time.time())
|
|
78
|
-
total = random.randint(10, 20)
|
|
79
|
-
data = [{"c0": i, "c1": str(i)} for i in range(1, total)]
|
|
80
|
-
self.pipeline.input_json("t0", data)
|
|
81
|
-
self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
|
|
82
|
-
|
|
83
|
-
start = time.time()
|
|
84
|
-
timeout = 5
|
|
85
|
-
|
|
86
|
-
while True:
|
|
87
|
-
processed = self.pipeline.stats().global_metrics.total_processed_records
|
|
88
|
-
if processed == total:
|
|
89
|
-
break
|
|
90
|
-
|
|
91
|
-
if time.time() - start > timeout:
|
|
92
|
-
raise TimeoutError(
|
|
93
|
-
f"timed out while waiting for pipeline to process {total} records"
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
time.sleep(0.1)
|
|
97
|
-
|
|
98
|
-
got_before = list(self.pipeline.query("SELECT * FROM v0"))
|
|
99
|
-
print(f"{self.pipeline.name}: records: {total}, {got_before}", file=sys.stderr)
|
|
100
|
-
|
|
101
|
-
if len(got_before) != processed:
|
|
102
|
-
raise RuntimeError(
|
|
103
|
-
f"adhoc query returned {len(got_before)} but {processed} records were processed: {got_before}"
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
self.pipeline.checkpoint(wait=True)
|
|
107
|
-
uuid = self.pipeline.sync_checkpoint(wait=True)
|
|
108
|
-
|
|
109
|
-
self.pipeline.stop(force=True)
|
|
110
|
-
|
|
111
|
-
if clear_storage:
|
|
112
|
-
self.pipeline.clear_storage()
|
|
113
|
-
|
|
114
|
-
if random_uuid:
|
|
115
|
-
uuid = uuid4()
|
|
116
|
-
|
|
117
|
-
# Restart pipeline from checkpoint
|
|
118
|
-
storage_config = storage_cfg(
|
|
119
|
-
pipeline_name=self.pipeline.name,
|
|
120
|
-
start_from_checkpoint=uuid if from_uuid else "latest",
|
|
121
|
-
auth_err=auth_err,
|
|
122
|
-
strict=strict,
|
|
123
|
-
standby=standby,
|
|
124
|
-
)
|
|
125
|
-
self.pipeline.set_runtime_config(
|
|
126
|
-
RuntimeConfig(
|
|
127
|
-
fault_tolerance_model=ft, storage=Storage(config=storage_config)
|
|
128
|
-
)
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
if not standby:
|
|
132
|
-
self.pipeline.start()
|
|
133
|
-
else:
|
|
134
|
-
self.pipeline.start(wait=False)
|
|
135
|
-
|
|
136
|
-
# wait for the pipeline to initialize
|
|
137
|
-
start = time.monotonic()
|
|
138
|
-
# wait for a maximum of 120 seconds for the pipeline to provison
|
|
139
|
-
end = start + 120
|
|
140
|
-
|
|
141
|
-
# wait for the pipeline to finish provisoning
|
|
142
|
-
for log in self.pipeline.logs():
|
|
143
|
-
if "checkpoint pulled successfully" in log:
|
|
144
|
-
break
|
|
145
|
-
|
|
146
|
-
if time.monotonic() > end:
|
|
147
|
-
raise TimeoutError(
|
|
148
|
-
f"{self.pipeline.name} timedout waiting to pull checkpoint"
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
if standby:
|
|
152
|
-
# wait for 8 seconds, this should be more than enough time
|
|
153
|
-
time.sleep(8)
|
|
154
|
-
assert self.pipeline.status() == PipelineStatus.INITIALIZING
|
|
155
|
-
|
|
156
|
-
self.pipeline.activate(timeout_s=10)
|
|
157
|
-
|
|
158
|
-
got_after = list(self.pipeline.query("SELECT * FROM v0"))
|
|
159
|
-
|
|
160
|
-
print(
|
|
161
|
-
f"{self.pipeline.name}: after: {len(got_after)}, {got_after}",
|
|
162
|
-
file=sys.stderr,
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
if expect_empty:
|
|
166
|
-
got_before = []
|
|
167
|
-
|
|
168
|
-
self.assertCountEqual(got_before, got_after)
|
|
169
|
-
|
|
170
|
-
self.pipeline.stop(force=True)
|
|
171
|
-
|
|
172
|
-
if clear_storage:
|
|
173
|
-
self.pipeline.clear_storage()
|
|
174
|
-
|
|
175
|
-
@enterprise_only
|
|
176
|
-
def test_from_uuid(self):
|
|
177
|
-
self.test_checkpoint_sync(from_uuid=True)
|
|
178
|
-
|
|
179
|
-
@enterprise_only
|
|
180
|
-
def test_without_clearing_storage(self):
|
|
181
|
-
self.test_checkpoint_sync(clear_storage=False)
|
|
182
|
-
|
|
183
|
-
@enterprise_only
|
|
184
|
-
def test_autherr_fail(self):
|
|
185
|
-
with self.assertRaisesRegex(RuntimeError, "SignatureDoesNotMatch"):
|
|
186
|
-
self.test_checkpoint_sync(auth_err=True, strict=True)
|
|
187
|
-
|
|
188
|
-
@enterprise_only
|
|
189
|
-
def test_autherr(self):
|
|
190
|
-
self.test_checkpoint_sync(auth_err=True, strict=False, expect_empty=True)
|
|
191
|
-
|
|
192
|
-
@enterprise_only
|
|
193
|
-
def test_nonexistent_checkpoint_fail(self):
|
|
194
|
-
with self.assertRaisesRegex(RuntimeError, "were not found in source"):
|
|
195
|
-
self.test_checkpoint_sync(random_uuid=True, from_uuid=True, strict=True)
|
|
196
|
-
|
|
197
|
-
@enterprise_only
|
|
198
|
-
def test_nonexistent_checkpoint(self):
|
|
199
|
-
self.test_checkpoint_sync(random_uuid=True, from_uuid=True, expect_empty=True)
|
|
200
|
-
|
|
201
|
-
@enterprise_only
|
|
202
|
-
def test_standby_activation(self):
|
|
203
|
-
self.test_checkpoint_sync(standby=True)
|
|
204
|
-
|
|
205
|
-
@enterprise_only
|
|
206
|
-
def test_standby_activation_from_uuid(self):
|
|
207
|
-
self.test_checkpoint_sync(standby=True, from_uuid=True)
|
|
208
|
-
|
|
209
|
-
@enterprise_only
|
|
210
|
-
def test_standby_fallback(self, from_uuid: bool = False):
|
|
211
|
-
# Step 1: Start main pipeline
|
|
212
|
-
storage_config = storage_cfg(self.pipeline.name)
|
|
213
|
-
ft = FaultToleranceModel.AtLeastOnce
|
|
214
|
-
self.pipeline.set_runtime_config(
|
|
215
|
-
RuntimeConfig(
|
|
216
|
-
fault_tolerance_model=ft, storage=Storage(config=storage_config)
|
|
217
|
-
)
|
|
218
|
-
)
|
|
219
|
-
self.pipeline.start()
|
|
220
|
-
|
|
221
|
-
# Insert initial data
|
|
222
|
-
random.seed(time.time())
|
|
223
|
-
total_initial = random.randint(10, 20)
|
|
224
|
-
data_initial = [{"c0": i, "c1": str(i)} for i in range(1, total_initial)]
|
|
225
|
-
self.pipeline.input_json("t0", data_initial)
|
|
226
|
-
self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
|
|
227
|
-
self.pipeline.wait_for_completion()
|
|
228
|
-
|
|
229
|
-
got_before = list(self.pipeline.query("select * from v0"))
|
|
230
|
-
|
|
231
|
-
# Step 2: Create checkpoint and sync
|
|
232
|
-
self.pipeline.checkpoint(wait=True)
|
|
233
|
-
uuid = self.pipeline.sync_checkpoint(wait=True)
|
|
234
|
-
|
|
235
|
-
# Step 3: Start standby pipeline
|
|
236
|
-
standby = self.new_pipeline_with_suffix("standby")
|
|
237
|
-
pull_interval = 1
|
|
238
|
-
standby.set_runtime_config(
|
|
239
|
-
RuntimeConfig(
|
|
240
|
-
fault_tolerance_model=ft,
|
|
241
|
-
storage=Storage(
|
|
242
|
-
config=storage_cfg(
|
|
243
|
-
self.pipeline.name,
|
|
244
|
-
start_from_checkpoint=uuid if from_uuid else "latest",
|
|
245
|
-
standby=True,
|
|
246
|
-
pull_interval=pull_interval,
|
|
247
|
-
)
|
|
248
|
-
),
|
|
249
|
-
)
|
|
250
|
-
)
|
|
251
|
-
standby.start(wait=False)
|
|
252
|
-
|
|
253
|
-
# Wait until standby pulls the first checkpoint
|
|
254
|
-
start = time.monotonic()
|
|
255
|
-
end = start + 120
|
|
256
|
-
for log in standby.logs():
|
|
257
|
-
if "checkpoint pulled successfully" in log:
|
|
258
|
-
break
|
|
259
|
-
if time.monotonic() > end:
|
|
260
|
-
raise TimeoutError(
|
|
261
|
-
"Timed out waiting for standby pipeline to pull checkpoint"
|
|
262
|
-
)
|
|
263
|
-
|
|
264
|
-
# Step 4: Add more data and make 3-10 checkpoints
|
|
265
|
-
extra_ckpts = random.randint(3, 10)
|
|
266
|
-
total_additional = 0
|
|
267
|
-
|
|
268
|
-
for i in range(extra_ckpts):
|
|
269
|
-
new_val = 100 + i
|
|
270
|
-
new_data = [{"c0": new_val, "c1": f"extra_{new_val}"}]
|
|
271
|
-
self.pipeline.input_json("t0", new_data)
|
|
272
|
-
self.pipeline.wait_for_completion()
|
|
273
|
-
total_additional += 1
|
|
274
|
-
self.pipeline.checkpoint(wait=True)
|
|
275
|
-
self.pipeline.sync_checkpoint(wait=True)
|
|
276
|
-
time.sleep(0.2)
|
|
277
|
-
|
|
278
|
-
got_expected = (
|
|
279
|
-
got_before if from_uuid else list(self.pipeline.query("SELECT * FROM v0"))
|
|
280
|
-
)
|
|
281
|
-
print(
|
|
282
|
-
f"{self.pipeline.name}: final records before shutdown: {got_expected}",
|
|
283
|
-
file=sys.stderr,
|
|
284
|
-
)
|
|
285
|
-
|
|
286
|
-
# Step 5: Stop main and activate standby
|
|
287
|
-
self.pipeline.stop(force=True)
|
|
288
|
-
|
|
289
|
-
assert standby.status() == PipelineStatus.INITIALIZING
|
|
290
|
-
standby.activate(timeout_s=(pull_interval * extra_ckpts) + 60)
|
|
291
|
-
|
|
292
|
-
for log in standby.logs():
|
|
293
|
-
if "activated" in log:
|
|
294
|
-
break
|
|
295
|
-
if time.monotonic() > end:
|
|
296
|
-
raise TimeoutError("Timed out waiting for standby pipeline to activate")
|
|
297
|
-
|
|
298
|
-
# Step 6: Validate standby has all expected records
|
|
299
|
-
got_after = list(standby.query("SELECT * FROM v0"))
|
|
300
|
-
print(
|
|
301
|
-
f"{standby.name}: final records after activation: {got_after}",
|
|
302
|
-
file=sys.stderr,
|
|
303
|
-
)
|
|
304
|
-
self.assertCountEqual(got_expected, got_after)
|
|
305
|
-
|
|
306
|
-
# Cleanup
|
|
307
|
-
standby.stop(force=True)
|
|
308
|
-
|
|
309
|
-
standby.start()
|
|
310
|
-
got_final = list(standby.query("SELECT * FROM v0"))
|
|
311
|
-
standby.stop(force=True)
|
|
312
|
-
|
|
313
|
-
self.assertCountEqual(got_after, got_final)
|
|
314
|
-
|
|
315
|
-
self.pipeline.clear_storage()
|
|
316
|
-
|
|
317
|
-
@enterprise_only
|
|
318
|
-
def test_standby_fallback_from_uuid(self):
|
|
319
|
-
self.test_standby_fallback(from_uuid=True)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|