feldera 0.128.0__tar.gz → 0.129.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of feldera might be problematic. Click here for more details.

Files changed (35) hide show
  1. {feldera-0.128.0 → feldera-0.129.0}/PKG-INFO +18 -7
  2. {feldera-0.128.0 → feldera-0.129.0}/README.md +17 -6
  3. {feldera-0.128.0 → feldera-0.129.0}/feldera/pipeline.py +19 -0
  4. {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/_httprequests.py +15 -5
  5. {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/config.py +3 -0
  6. {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/feldera_client.py +31 -4
  7. {feldera-0.128.0 → feldera-0.129.0}/feldera/stats.py +3 -3
  8. {feldera-0.128.0 → feldera-0.129.0}/feldera.egg-info/PKG-INFO +18 -7
  9. {feldera-0.128.0 → feldera-0.129.0}/feldera.egg-info/SOURCES.txt +1 -0
  10. {feldera-0.128.0 → feldera-0.129.0}/pyproject.toml +1 -1
  11. feldera-0.129.0/tests/test_checkpoint_sync.py +320 -0
  12. feldera-0.129.0/tests/test_issue4457.py +57 -0
  13. {feldera-0.128.0 → feldera-0.129.0}/tests/test_shared_pipeline.py +11 -0
  14. feldera-0.128.0/tests/test_checkpoint_sync.py +0 -319
  15. {feldera-0.128.0 → feldera-0.129.0}/feldera/__init__.py +0 -0
  16. {feldera-0.128.0 → feldera-0.129.0}/feldera/_callback_runner.py +0 -0
  17. {feldera-0.128.0 → feldera-0.129.0}/feldera/_helpers.py +0 -0
  18. {feldera-0.128.0 → feldera-0.129.0}/feldera/enums.py +0 -0
  19. {feldera-0.128.0 → feldera-0.129.0}/feldera/output_handler.py +0 -0
  20. {feldera-0.128.0 → feldera-0.129.0}/feldera/pipeline_builder.py +0 -0
  21. {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/__init__.py +0 -0
  22. {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/_helpers.py +0 -0
  23. {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/errors.py +0 -0
  24. {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/feldera_config.py +0 -0
  25. {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/pipeline.py +0 -0
  26. {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/sql_table.py +0 -0
  27. {feldera-0.128.0 → feldera-0.129.0}/feldera/rest/sql_view.py +0 -0
  28. {feldera-0.128.0 → feldera-0.129.0}/feldera/runtime_config.py +0 -0
  29. {feldera-0.128.0 → feldera-0.129.0}/feldera.egg-info/dependency_links.txt +0 -0
  30. {feldera-0.128.0 → feldera-0.129.0}/feldera.egg-info/requires.txt +0 -0
  31. {feldera-0.128.0 → feldera-0.129.0}/feldera.egg-info/top_level.txt +0 -0
  32. {feldera-0.128.0 → feldera-0.129.0}/setup.cfg +0 -0
  33. {feldera-0.128.0 → feldera-0.129.0}/tests/test_pipeline_builder.py +0 -0
  34. {feldera-0.128.0 → feldera-0.129.0}/tests/test_shared_pipeline_stress.py +0 -0
  35. {feldera-0.128.0 → feldera-0.129.0}/tests/test_udf.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: feldera
3
- Version: 0.128.0
3
+ Version: 0.129.0
4
4
  Summary: The feldera python client
5
5
  Author-email: Feldera Team <dev@feldera.com>
6
6
  License: MIT
@@ -28,19 +28,19 @@ Feldera Python is the Feldera SDK for Python developers.
28
28
  ## Installation
29
29
 
30
30
  ```bash
31
- pip install feldera
31
+ uv pip install feldera
32
32
  ```
33
33
 
34
34
  ### Installing from Github
35
35
 
36
36
  ```bash
37
- pip install git+https://github.com/feldera/feldera#subdirectory=python
37
+ uv pip install git+https://github.com/feldera/feldera#subdirectory=python
38
38
  ```
39
39
 
40
40
  Similarly, to install from a specific branch:
41
41
 
42
42
  ```bash
43
- $ pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
43
+ uv pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
44
44
  ```
45
45
 
46
46
  Replace `{BRANCH_NAME}` with the name of the branch you want to install from.
@@ -51,7 +51,12 @@ If you have cloned the Feldera repo, you can install the python SDK as follows:
51
51
 
52
52
  ```bash
53
53
  # the Feldera Python SDK is present inside the python/ directory
54
- pip install python/
54
+ cd python
55
+ # If you don't have a virtual environment, create one
56
+ uv venv
57
+ source .venv/activate
58
+ # Install the SDK in editable mode
59
+ uv pip install .
55
60
  ```
56
61
 
57
62
  ## Documentation
@@ -61,7 +66,7 @@ The Python SDK documentation is available at
61
66
 
62
67
  To build the html documentation run:
63
68
 
64
- Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
69
+ Ensure that you have sphinx installed. If not, install it using `uv pip install sphinx`.
65
70
 
66
71
  Then run the following commands:
67
72
 
@@ -92,7 +97,13 @@ To run tests from a specific file:
92
97
  (cd python && python3 -m pytest ./tests/path-to-file.py)
93
98
  ```
94
99
 
95
- #### Running Tests
100
+ To run a specific test:
101
+
102
+ ```bash
103
+ uv run python -m pytest tests/test_shared_pipeline.py::TestPipeline::test_adhoc_query_hash -v
104
+ ```
105
+
106
+ #### Running All Tests
96
107
 
97
108
  The tests validate end-to-end correctness of SQL functionality. To
98
109
  run the tests use:
@@ -5,19 +5,19 @@ Feldera Python is the Feldera SDK for Python developers.
5
5
  ## Installation
6
6
 
7
7
  ```bash
8
- pip install feldera
8
+ uv pip install feldera
9
9
  ```
10
10
 
11
11
  ### Installing from Github
12
12
 
13
13
  ```bash
14
- pip install git+https://github.com/feldera/feldera#subdirectory=python
14
+ uv pip install git+https://github.com/feldera/feldera#subdirectory=python
15
15
  ```
16
16
 
17
17
  Similarly, to install from a specific branch:
18
18
 
19
19
  ```bash
20
- $ pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
20
+ uv pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
21
21
  ```
22
22
 
23
23
  Replace `{BRANCH_NAME}` with the name of the branch you want to install from.
@@ -28,7 +28,12 @@ If you have cloned the Feldera repo, you can install the python SDK as follows:
28
28
 
29
29
  ```bash
30
30
  # the Feldera Python SDK is present inside the python/ directory
31
- pip install python/
31
+ cd python
32
+ # If you don't have a virtual environment, create one
33
+ uv venv
34
+ source .venv/activate
35
+ # Install the SDK in editable mode
36
+ uv pip install .
32
37
  ```
33
38
 
34
39
  ## Documentation
@@ -38,7 +43,7 @@ The Python SDK documentation is available at
38
43
 
39
44
  To build the html documentation run:
40
45
 
41
- Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
46
+ Ensure that you have sphinx installed. If not, install it using `uv pip install sphinx`.
42
47
 
43
48
  Then run the following commands:
44
49
 
@@ -69,7 +74,13 @@ To run tests from a specific file:
69
74
  (cd python && python3 -m pytest ./tests/path-to-file.py)
70
75
  ```
71
76
 
72
- #### Running Tests
77
+ To run a specific test:
78
+
79
+ ```bash
80
+ uv run python -m pytest tests/test_shared_pipeline.py::TestPipeline::test_adhoc_query_hash -v
81
+ ```
82
+
83
+ #### Running All Tests
73
84
 
74
85
  The tests validate end-to-end correctness of SQL functionality. To
75
86
  run the tests use:
@@ -809,6 +809,25 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
809
809
 
810
810
  return self.client.query_as_text(self.name, query)
811
811
 
812
+ def query_hash(self, query: str):
813
+ """
814
+ Executes an ad-hoc SQL query on this pipeline and returns the result
815
+ as a hash of the result set. This is useful for quickly checking
816
+ if the result set has changed without retrieving the entire result.
817
+
818
+ Note:
819
+ For a stable hash, the query must be deterministic which means
820
+ it should be sorted.
821
+
822
+ :param query: The SQL query to be executed.
823
+
824
+ :raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
825
+ state.
826
+ :raises FelderaAPIError: If querying a non materialized table or view.
827
+ :raises FelderaAPIError: If the query is invalid.
828
+ """
829
+ return self.client.query_as_hash(self.name, query)
830
+
812
831
  def execute(self, query: str):
813
832
  """
814
833
  Executes an ad-hoc SQL query on the current pipeline, discarding its
@@ -57,6 +57,7 @@ class HttpRequests:
57
57
  self.headers["Content-Type"] = content_type
58
58
 
59
59
  try:
60
+ conn_timeout = self.config.connection_timeout
60
61
  timeout = self.config.timeout
61
62
  headers = self.headers
62
63
 
@@ -74,7 +75,7 @@ class HttpRequests:
74
75
  if http_method.__name__ == "get":
75
76
  request = http_method(
76
77
  request_path,
77
- timeout=timeout,
78
+ timeout=(conn_timeout, timeout),
78
79
  headers=headers,
79
80
  params=params,
80
81
  stream=stream,
@@ -83,7 +84,7 @@ class HttpRequests:
83
84
  elif isinstance(body, bytes):
84
85
  request = http_method(
85
86
  request_path,
86
- timeout=timeout,
87
+ timeout=(conn_timeout, timeout),
87
88
  headers=headers,
88
89
  data=body,
89
90
  params=params,
@@ -93,7 +94,7 @@ class HttpRequests:
93
94
  else:
94
95
  request = http_method(
95
96
  request_path,
96
- timeout=timeout,
97
+ timeout=(conn_timeout, timeout),
97
98
  headers=headers,
98
99
  data=json_serialize(body) if serialize else body,
99
100
  params=params,
@@ -118,9 +119,18 @@ class HttpRequests:
118
119
  time.sleep(2) # backoff, adjust as needed
119
120
  continue
120
121
  raise # re-raise for all other errors or if out of retries
122
+ except requests.exceptions.Timeout as err:
123
+ if attempt < max_retries:
124
+ logging.warning(
125
+ "HTTP Connection Timeout for %s, retrying (%d/%d)...",
126
+ path,
127
+ attempt + 1,
128
+ max_retries,
129
+ )
130
+ time.sleep(2)
131
+ continue
132
+ raise FelderaTimeoutError(str(err)) from err
121
133
 
122
- except requests.exceptions.Timeout as err:
123
- raise FelderaTimeoutError(str(err)) from err
124
134
  except requests.exceptions.ConnectionError as err:
125
135
  raise FelderaCommunicationError(str(err)) from err
126
136
 
@@ -12,6 +12,7 @@ class Config:
12
12
  api_key: Optional[str] = None,
13
13
  version: Optional[str] = None,
14
14
  timeout: Optional[float] = None,
15
+ connection_timeout: Optional[float] = None,
15
16
  requests_verify: bool = True,
16
17
  ) -> None:
17
18
  """
@@ -19,6 +20,7 @@ class Config:
19
20
  :param api_key: The optional API key to access Feldera
20
21
  :param version: The version of the API to use
21
22
  :param timeout: The timeout for the HTTP requests
23
+ :param connection_timeout: The connection timeout for the HTTP requests
22
24
  :param requests_verify: The `verify` parameter passed to the requests
23
25
  library. `True` by default.
24
26
  """
@@ -27,4 +29,5 @@ class Config:
27
29
  self.api_key: Optional[str] = api_key
28
30
  self.version: Optional[str] = version or "v0"
29
31
  self.timeout: Optional[float] = timeout
32
+ self.connection_timeout: Optional[float] = connection_timeout
30
33
  self.requests_verify: bool = requests_verify
@@ -46,20 +46,26 @@ class FelderaClient:
46
46
  url: str,
47
47
  api_key: Optional[str] = None,
48
48
  timeout: Optional[float] = None,
49
+ connection_timeout: Optional[float] = None,
49
50
  requests_verify: bool = True,
50
51
  ) -> None:
51
52
  """
52
53
  :param url: The url to Feldera API (ex: https://try.feldera.com)
53
54
  :param api_key: The optional API key for Feldera
54
- :param timeout: (optional) The amount of time in seconds that the client
55
- will wait for a response before timing
56
- out.
55
+ :param timeout: (optional) The amount of time in seconds that the
56
+ client will wait for a response before timing out.
57
+ :param connection_timeout: (optional) The amount of time in seconds that
58
+ the client will wait to establish connection before timing out.
57
59
  :param requests_verify: The `verify` parameter passed to the requests
58
60
  library. `True` by default.
59
61
  """
60
62
 
61
63
  self.config = Config(
62
- url, api_key, timeout=timeout, requests_verify=requests_verify
64
+ url,
65
+ api_key,
66
+ timeout=timeout,
67
+ connection_timeout=connection_timeout,
68
+ requests_verify=requests_verify,
63
69
  )
64
70
  self.http = HttpRequests(self.config)
65
71
 
@@ -895,6 +901,27 @@ Reason: The pipeline is in a STOPPED state due to the following error:
895
901
  if chunk:
896
902
  yield chunk.decode("utf-8")
897
903
 
904
+ def query_as_hash(self, pipeline_name: str, query: str) -> str:
905
+ """
906
+ Executes an ad-hoc query on the specified pipeline and returns a hash of the result.
907
+
908
+ :param pipeline_name: The name of the pipeline to query.
909
+ :param query: The SQL query to be executed.
910
+ :return: A string containing the hash of the query result.
911
+ """
912
+ params = {
913
+ "pipeline_name": pipeline_name,
914
+ "sql": query,
915
+ "format": "hash",
916
+ }
917
+
918
+ resp = self.http.get(
919
+ path=f"/pipelines/{pipeline_name}/query",
920
+ params=params,
921
+ stream=False,
922
+ )
923
+ return resp
924
+
898
925
  def query_as_parquet(self, pipeline_name: str, query: str, path: str):
899
926
  """
900
927
  Executes an ad-hoc query on the specified pipeline and saves the result to a parquet file.
@@ -26,8 +26,8 @@ class PipelineStatistics:
26
26
  pipeline.inputs = [
27
27
  InputEndpointStatus.from_dict(input) for input in d["inputs"]
28
28
  ]
29
- pipeline.inputs = [
30
- OutputEndpointStatus().from_dict(output) for output in d["outputs"]
29
+ pipeline.outputs = [
30
+ OutputEndpointStatus.from_dict(output) for output in d["outputs"]
31
31
  ]
32
32
  return pipeline
33
33
 
@@ -46,7 +46,7 @@ class GlobalPipelineMetrics:
46
46
  self.rss_bytes: Optional[int] = None
47
47
  self.cpu_msecs: Optional[int] = None
48
48
  self.start_time: Optional[datetime] = None
49
- self.incarnation_uuid: Optional[uuid] = None
49
+ self.incarnation_uuid: Optional[uuid.UUID] = None
50
50
  self.storage_bytes: Optional[int] = None
51
51
  self.storage_mb_secs: Optional[int] = None
52
52
  self.runtime_elapsed_msecs: Optional[int] = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: feldera
3
- Version: 0.128.0
3
+ Version: 0.129.0
4
4
  Summary: The feldera python client
5
5
  Author-email: Feldera Team <dev@feldera.com>
6
6
  License: MIT
@@ -28,19 +28,19 @@ Feldera Python is the Feldera SDK for Python developers.
28
28
  ## Installation
29
29
 
30
30
  ```bash
31
- pip install feldera
31
+ uv pip install feldera
32
32
  ```
33
33
 
34
34
  ### Installing from Github
35
35
 
36
36
  ```bash
37
- pip install git+https://github.com/feldera/feldera#subdirectory=python
37
+ uv pip install git+https://github.com/feldera/feldera#subdirectory=python
38
38
  ```
39
39
 
40
40
  Similarly, to install from a specific branch:
41
41
 
42
42
  ```bash
43
- $ pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
43
+ uv pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
44
44
  ```
45
45
 
46
46
  Replace `{BRANCH_NAME}` with the name of the branch you want to install from.
@@ -51,7 +51,12 @@ If you have cloned the Feldera repo, you can install the python SDK as follows:
51
51
 
52
52
  ```bash
53
53
  # the Feldera Python SDK is present inside the python/ directory
54
- pip install python/
54
+ cd python
55
+ # If you don't have a virtual environment, create one
56
+ uv venv
57
+ source .venv/activate
58
+ # Install the SDK in editable mode
59
+ uv pip install .
55
60
  ```
56
61
 
57
62
  ## Documentation
@@ -61,7 +66,7 @@ The Python SDK documentation is available at
61
66
 
62
67
  To build the html documentation run:
63
68
 
64
- Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
69
+ Ensure that you have sphinx installed. If not, install it using `uv pip install sphinx`.
65
70
 
66
71
  Then run the following commands:
67
72
 
@@ -92,7 +97,13 @@ To run tests from a specific file:
92
97
  (cd python && python3 -m pytest ./tests/path-to-file.py)
93
98
  ```
94
99
 
95
- #### Running Tests
100
+ To run a specific test:
101
+
102
+ ```bash
103
+ uv run python -m pytest tests/test_shared_pipeline.py::TestPipeline::test_adhoc_query_hash -v
104
+ ```
105
+
106
+ #### Running All Tests
96
107
 
97
108
  The tests validate end-to-end correctness of SQL functionality. To
98
109
  run the tests use:
@@ -25,6 +25,7 @@ feldera/rest/pipeline.py
25
25
  feldera/rest/sql_table.py
26
26
  feldera/rest/sql_view.py
27
27
  tests/test_checkpoint_sync.py
28
+ tests/test_issue4457.py
28
29
  tests/test_pipeline_builder.py
29
30
  tests/test_shared_pipeline.py
30
31
  tests/test_shared_pipeline_stress.py
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
  name = "feldera"
7
7
  readme = "README.md"
8
8
  description = "The feldera python client"
9
- version = "0.128.0"
9
+ version = "0.129.0"
10
10
  license = { text = "MIT" }
11
11
  requires-python = ">=3.10"
12
12
  authors = [
@@ -0,0 +1,320 @@
1
+ # from tests.shared_test_pipeline import SharedTestPipeline
2
+ # from tests import enterprise_only
3
+ # from feldera.runtime_config import RuntimeConfig, Storage
4
+ # from feldera.enums import PipelineStatus, FaultToleranceModel
5
+ # from typing import Optional
6
+ # import os
7
+ # import sys
8
+ # import time
9
+ # from uuid import uuid4
10
+ # import random
11
+ # import pytest
12
+ #
13
+ #
14
+ # DEFAULT_ENDPOINT = os.environ.get(
15
+ # "DEFAULT_MINIO_ENDPOINT", "http://minio.extra.svc.cluster.local:9000"
16
+ # )
17
+ # DEFAULT_BUCKET = "default"
18
+ # ACCESS_KEY = "minio"
19
+ # SECRET_KEY = "miniopasswd"
20
+ #
21
+ #
22
+ # def storage_cfg(
23
+ # pipeline_name: str,
24
+ # endpoint: Optional[str] = None,
25
+ # start_from_checkpoint: Optional[str] = None,
26
+ # strict: bool = False,
27
+ # auth_err: bool = False,
28
+ # standby: bool = False,
29
+ # pull_interval: int = 2,
30
+ # ) -> dict:
31
+ # return {
32
+ # "backend": {
33
+ # "name": "file",
34
+ # "config": {
35
+ # "sync": {
36
+ # "bucket": f"{DEFAULT_BUCKET}/{pipeline_name}",
37
+ # "access_key": ACCESS_KEY,
38
+ # "secret_key": SECRET_KEY if not auth_err else SECRET_KEY + "extra",
39
+ # "provider": "Minio",
40
+ # "endpoint": endpoint or DEFAULT_ENDPOINT,
41
+ # "start_from_checkpoint": start_from_checkpoint,
42
+ # "fail_if_no_checkpoint": strict,
43
+ # "standby": standby,
44
+ # "pull_interval": pull_interval,
45
+ # }
46
+ # },
47
+ # }
48
+ # }
49
+ #
50
+ #
51
+ # class TestCheckpointSync(SharedTestPipeline):
52
+ # @enterprise_only
53
+ # def test_checkpoint_sync(
54
+ # self,
55
+ # from_uuid: bool = False,
56
+ # random_uuid: bool = False,
57
+ # clear_storage: bool = True,
58
+ # auth_err: bool = False,
59
+ # strict: bool = False,
60
+ # expect_empty: bool = False,
61
+ # standby: bool = False,
62
+ # ):
63
+ # """
64
+ # CREATE TABLE t0 (c0 INT, c1 VARCHAR);
65
+ # CREATE MATERIALIZED VIEW v0 AS SELECT * FROM t0;
66
+ # """
67
+ #
68
+ # storage_config = storage_cfg(self.pipeline.name)
69
+ # ft = FaultToleranceModel.AtLeastOnce
70
+ #
71
+ # self.pipeline.set_runtime_config(
72
+ # RuntimeConfig(
73
+ # fault_tolerance_model=ft, storage=Storage(config=storage_config)
74
+ # )
75
+ # )
76
+ # self.pipeline.start()
77
+ #
78
+ # random.seed(time.time())
79
+ # total = random.randint(10, 20)
80
+ # data = [{"c0": i, "c1": str(i)} for i in range(1, total)]
81
+ # self.pipeline.input_json("t0", data)
82
+ # self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
83
+ #
84
+ # start = time.time()
85
+ # timeout = 5
86
+ #
87
+ # while True:
88
+ # processed = self.pipeline.stats().global_metrics.total_processed_records
89
+ # if processed == total:
90
+ # break
91
+ #
92
+ # if time.time() - start > timeout:
93
+ # raise TimeoutError(
94
+ # f"timed out while waiting for pipeline to process {total} records"
95
+ # )
96
+ #
97
+ # time.sleep(0.1)
98
+ #
99
+ # got_before = list(self.pipeline.query("SELECT * FROM v0"))
100
+ # print(f"{self.pipeline.name}: records: {total}, {got_before}", file=sys.stderr)
101
+ #
102
+ # if len(got_before) != processed:
103
+ # raise RuntimeError(
104
+ # f"adhoc query returned {len(got_before)} but {processed} records were processed: {got_before}"
105
+ # )
106
+ #
107
+ # self.pipeline.checkpoint(wait=True)
108
+ # uuid = self.pipeline.sync_checkpoint(wait=True)
109
+ #
110
+ # self.pipeline.stop(force=True)
111
+ #
112
+ # if clear_storage:
113
+ # self.pipeline.clear_storage()
114
+ #
115
+ # if random_uuid:
116
+ # uuid = uuid4()
117
+ #
118
+ # # Restart pipeline from checkpoint
119
+ # storage_config = storage_cfg(
120
+ # pipeline_name=self.pipeline.name,
121
+ # start_from_checkpoint=uuid if from_uuid else "latest",
122
+ # auth_err=auth_err,
123
+ # strict=strict,
124
+ # standby=standby,
125
+ # )
126
+ # self.pipeline.set_runtime_config(
127
+ # RuntimeConfig(
128
+ # fault_tolerance_model=ft, storage=Storage(config=storage_config)
129
+ # )
130
+ # )
131
+ #
132
+ # if not standby:
133
+ # self.pipeline.start()
134
+ # else:
135
+ # self.pipeline.start(wait=False)
136
+ #
137
+ # # wait for the pipeline to initialize
138
+ # start = time.monotonic()
139
+ # # wait for a maximum of 120 seconds for the pipeline to provison
140
+ # end = start + 120
141
+ #
142
+ # # wait for the pipeline to finish provisoning
143
+ # for log in self.pipeline.logs():
144
+ # if "checkpoint pulled successfully" in log:
145
+ # break
146
+ #
147
+ # if time.monotonic() > end:
148
+ # raise TimeoutError(
149
+ # f"{self.pipeline.name} timedout waiting to pull checkpoint"
150
+ # )
151
+ #
152
+ # if standby:
153
+ # # wait for 8 seconds, this should be more than enough time
154
+ # time.sleep(8)
155
+ # assert self.pipeline.status() == PipelineStatus.INITIALIZING
156
+ #
157
+ # self.pipeline.activate(timeout_s=10)
158
+ #
159
+ # got_after = list(self.pipeline.query("SELECT * FROM v0"))
160
+ #
161
+ # print(
162
+ # f"{self.pipeline.name}: after: {len(got_after)}, {got_after}",
163
+ # file=sys.stderr,
164
+ # )
165
+ #
166
+ # if expect_empty:
167
+ # got_before = []
168
+ #
169
+ # self.assertCountEqual(got_before, got_after)
170
+ #
171
+ # self.pipeline.stop(force=True)
172
+ #
173
+ # if clear_storage:
174
+ # self.pipeline.clear_storage()
175
+ #
176
+ # @enterprise_only
177
+ # def test_from_uuid(self):
178
+ # self.test_checkpoint_sync(from_uuid=True)
179
+ #
180
+ # @enterprise_only
181
+ # def test_without_clearing_storage(self):
182
+ # self.test_checkpoint_sync(clear_storage=False)
183
+ #
184
+ # @enterprise_only
185
+ # def test_autherr_fail(self):
186
+ # with self.assertRaisesRegex(RuntimeError, "SignatureDoesNotMatch"):
187
+ # self.test_checkpoint_sync(auth_err=True, strict=True)
188
+ #
189
+ # @enterprise_only
190
+ # def test_autherr(self):
191
+ # self.test_checkpoint_sync(auth_err=True, strict=False, expect_empty=True)
192
+ #
193
+ # @enterprise_only
194
+ # def test_nonexistent_checkpoint_fail(self):
195
+ # with self.assertRaisesRegex(RuntimeError, "were not found in source"):
196
+ # self.test_checkpoint_sync(random_uuid=True, from_uuid=True, strict=True)
197
+ #
198
+ # @enterprise_only
199
+ # def test_nonexistent_checkpoint(self):
200
+ # self.test_checkpoint_sync(random_uuid=True, from_uuid=True, expect_empty=True)
201
+ #
202
+ # @enterprise_only
203
+ # def test_standby_activation(self):
204
+ # self.test_checkpoint_sync(standby=True)
205
+ #
206
+ # @enterprise_only
207
+ # def test_standby_activation_from_uuid(self):
208
+ # self.test_checkpoint_sync(standby=True, from_uuid=True)
209
+ #
210
+ # @enterprise_only
211
+ # def test_standby_fallback(self, from_uuid: bool = False):
212
+ # # Step 1: Start main pipeline
213
+ # storage_config = storage_cfg(self.pipeline.name)
214
+ # ft = FaultToleranceModel.AtLeastOnce
215
+ # self.pipeline.set_runtime_config(
216
+ # RuntimeConfig(
217
+ # fault_tolerance_model=ft, storage=Storage(config=storage_config)
218
+ # )
219
+ # )
220
+ # self.pipeline.start()
221
+ #
222
+ # # Insert initial data
223
+ # random.seed(time.time())
224
+ # total_initial = random.randint(10, 20)
225
+ # data_initial = [{"c0": i, "c1": str(i)} for i in range(1, total_initial)]
226
+ # self.pipeline.input_json("t0", data_initial)
227
+ # self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
228
+ # self.pipeline.wait_for_completion()
229
+ #
230
+ # got_before = list(self.pipeline.query("select * from v0"))
231
+ #
232
+ # # Step 2: Create checkpoint and sync
233
+ # self.pipeline.checkpoint(wait=True)
234
+ # uuid = self.pipeline.sync_checkpoint(wait=True)
235
+ #
236
+ # # Step 3: Start standby pipeline
237
+ # standby = self.new_pipeline_with_suffix("standby")
238
+ # pull_interval = 1
239
+ # standby.set_runtime_config(
240
+ # RuntimeConfig(
241
+ # fault_tolerance_model=ft,
242
+ # storage=Storage(
243
+ # config=storage_cfg(
244
+ # self.pipeline.name,
245
+ # start_from_checkpoint=uuid if from_uuid else "latest",
246
+ # standby=True,
247
+ # pull_interval=pull_interval,
248
+ # )
249
+ # ),
250
+ # )
251
+ # )
252
+ # standby.start(wait=False)
253
+ #
254
+ # # Wait until standby pulls the first checkpoint
255
+ # start = time.monotonic()
256
+ # end = start + 120
257
+ # for log in standby.logs():
258
+ # if "checkpoint pulled successfully" in log:
259
+ # break
260
+ # if time.monotonic() > end:
261
+ # raise TimeoutError(
262
+ # "Timed out waiting for standby pipeline to pull checkpoint"
263
+ # )
264
+ #
265
+ # # Step 4: Add more data and make 3-10 checkpoints
266
+ # extra_ckpts = random.randint(3, 10)
267
+ # total_additional = 0
268
+ #
269
+ # for i in range(extra_ckpts):
270
+ # new_val = 100 + i
271
+ # new_data = [{"c0": new_val, "c1": f"extra_{new_val}"}]
272
+ # self.pipeline.input_json("t0", new_data)
273
+ # self.pipeline.wait_for_completion()
274
+ # total_additional += 1
275
+ # self.pipeline.checkpoint(wait=True)
276
+ # self.pipeline.sync_checkpoint(wait=True)
277
+ # time.sleep(0.2)
278
+ #
279
+ # got_expected = (
280
+ # got_before if from_uuid else list(self.pipeline.query("SELECT * FROM v0"))
281
+ # )
282
+ # print(
283
+ # f"{self.pipeline.name}: final records before shutdown: {got_expected}",
284
+ # file=sys.stderr,
285
+ # )
286
+ #
287
+ # # Step 5: Stop main and activate standby
288
+ # self.pipeline.stop(force=True)
289
+ #
290
+ # assert standby.status() == PipelineStatus.INITIALIZING
291
+ # standby.activate(timeout_s=(pull_interval * extra_ckpts) + 60)
292
+ #
293
+ # for log in standby.logs():
294
+ # if "activated" in log:
295
+ # break
296
+ # if time.monotonic() > end:
297
+ # raise TimeoutError("Timed out waiting for standby pipeline to activate")
298
+ #
299
+ # # Step 6: Validate standby has all expected records
300
+ # got_after = list(standby.query("SELECT * FROM v0"))
301
+ # print(
302
+ # f"{standby.name}: final records after activation: {got_after}",
303
+ # file=sys.stderr,
304
+ # )
305
+ # self.assertCountEqual(got_expected, got_after)
306
+ #
307
+ # # Cleanup
308
+ # standby.stop(force=True)
309
+ #
310
+ # standby.start()
311
+ # got_final = list(standby.query("SELECT * FROM v0"))
312
+ # standby.stop(force=True)
313
+ #
314
+ # self.assertCountEqual(got_after, got_final)
315
+ #
316
+ # self.pipeline.clear_storage()
317
+ #
318
+ # @enterprise_only
319
+ # def test_standby_fallback_from_uuid(self):
320
+ # self.test_standby_fallback(from_uuid=True)
@@ -0,0 +1,57 @@
1
+ import unittest
2
+
3
+ from pandas import Timestamp
4
+ from feldera import PipelineBuilder
5
+ from tests import TEST_CLIENT
6
+
7
+
8
+ class TestIssue_4457(unittest.TestCase):
9
+ def test_local(self):
10
+ sql = """
11
+ CREATE TABLE test_events (
12
+ id VARCHAR NOT NULL PRIMARY KEY,
13
+ a VARCHAR,
14
+ t TIMESTAMP NOT NULL LATENESS INTERVAL 1 MINUTE
15
+ );
16
+ CREATE VIEW V AS SELECT * FROM test_events;
17
+ """
18
+
19
+ pipeline = PipelineBuilder(
20
+ TEST_CLIENT, name="test_issue4457", sql=sql
21
+ ).create_or_replace()
22
+
23
+ # TODO: use .query() instead
24
+ out = pipeline.listen("v")
25
+
26
+ pipeline.start()
27
+
28
+ pipeline.input_json(
29
+ "test_events",
30
+ [{"id": "a", "a": "test4", "t": "2025-05-20 21:00:17.920"}],
31
+ )
32
+ pipeline.wait_for_idle()
33
+
34
+ output = out.to_dict()
35
+ assert output == [
36
+ {
37
+ "id": "a",
38
+ "a": "test4",
39
+ "t": Timestamp("2025-05-20 21:00:17.920"),
40
+ "insert_delete": 1,
41
+ }
42
+ ]
43
+
44
+ pipeline.input_json(
45
+ "test_events",
46
+ [{"id": "a", "a": "test5", "t": "2025-03-20 21:00:17.920"}],
47
+ )
48
+ pipeline.wait_for_idle()
49
+
50
+ output = out.to_dict()
51
+ assert output == []
52
+
53
+ pipeline.stop(force=True)
54
+
55
+
56
+ if __name__ == "__main__":
57
+ unittest.main()
@@ -84,6 +84,17 @@ class TestPipeline(SharedTestPipeline):
84
84
  got = "\n".join(resp)
85
85
  assert got in expected
86
86
 
87
+ def test_adhoc_query_hash(self):
88
+ data = "1\n2\n"
89
+ self.pipeline.start()
90
+ TEST_CLIENT.push_to_pipeline(self.pipeline.name, "tbl", "csv", data)
91
+ resp = TEST_CLIENT.query_as_hash(
92
+ self.pipeline.name, "SELECT * FROM tbl ORDER BY id"
93
+ )
94
+ assert (
95
+ resp == "0B021466CA428474EF16F899D0F841D7338C168C063DA5DB43666D1AB3081558"
96
+ )
97
+
87
98
  def test_adhoc_query_parquet(self):
88
99
  data = "1\n2\n"
89
100
  self.pipeline.start()
@@ -1,319 +0,0 @@
1
- from tests.shared_test_pipeline import SharedTestPipeline
2
- from tests import enterprise_only
3
- from feldera.runtime_config import RuntimeConfig, Storage
4
- from feldera.enums import PipelineStatus, FaultToleranceModel
5
- from typing import Optional
6
- import os
7
- import sys
8
- import time
9
- from uuid import uuid4
10
- import random
11
-
12
-
13
- DEFAULT_ENDPOINT = os.environ.get(
14
- "DEFAULT_MINIO_ENDPOINT", "http://minio.extra.svc.cluster.local:9000"
15
- )
16
- DEFAULT_BUCKET = "default"
17
- ACCESS_KEY = "minio"
18
- SECRET_KEY = "miniopasswd"
19
-
20
-
21
- def storage_cfg(
22
- pipeline_name: str,
23
- endpoint: Optional[str] = None,
24
- start_from_checkpoint: Optional[str] = None,
25
- strict: bool = False,
26
- auth_err: bool = False,
27
- standby: bool = False,
28
- pull_interval: int = 2,
29
- ) -> dict:
30
- return {
31
- "backend": {
32
- "name": "file",
33
- "config": {
34
- "sync": {
35
- "bucket": f"{DEFAULT_BUCKET}/{pipeline_name}",
36
- "access_key": ACCESS_KEY,
37
- "secret_key": SECRET_KEY if not auth_err else SECRET_KEY + "extra",
38
- "provider": "Minio",
39
- "endpoint": endpoint or DEFAULT_ENDPOINT,
40
- "start_from_checkpoint": start_from_checkpoint,
41
- "fail_if_no_checkpoint": strict,
42
- "standby": standby,
43
- "pull_interval": pull_interval,
44
- }
45
- },
46
- }
47
- }
48
-
49
-
50
- class TestCheckpointSync(SharedTestPipeline):
51
- @enterprise_only
52
- def test_checkpoint_sync(
53
- self,
54
- from_uuid: bool = False,
55
- random_uuid: bool = False,
56
- clear_storage: bool = True,
57
- auth_err: bool = False,
58
- strict: bool = False,
59
- expect_empty: bool = False,
60
- standby: bool = False,
61
- ):
62
- """
63
- CREATE TABLE t0 (c0 INT, c1 VARCHAR);
64
- CREATE MATERIALIZED VIEW v0 AS SELECT * FROM t0;
65
- """
66
-
67
- storage_config = storage_cfg(self.pipeline.name)
68
- ft = FaultToleranceModel.AtLeastOnce
69
-
70
- self.pipeline.set_runtime_config(
71
- RuntimeConfig(
72
- fault_tolerance_model=ft, storage=Storage(config=storage_config)
73
- )
74
- )
75
- self.pipeline.start()
76
-
77
- random.seed(time.time())
78
- total = random.randint(10, 20)
79
- data = [{"c0": i, "c1": str(i)} for i in range(1, total)]
80
- self.pipeline.input_json("t0", data)
81
- self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
82
-
83
- start = time.time()
84
- timeout = 5
85
-
86
- while True:
87
- processed = self.pipeline.stats().global_metrics.total_processed_records
88
- if processed == total:
89
- break
90
-
91
- if time.time() - start > timeout:
92
- raise TimeoutError(
93
- f"timed out while waiting for pipeline to process {total} records"
94
- )
95
-
96
- time.sleep(0.1)
97
-
98
- got_before = list(self.pipeline.query("SELECT * FROM v0"))
99
- print(f"{self.pipeline.name}: records: {total}, {got_before}", file=sys.stderr)
100
-
101
- if len(got_before) != processed:
102
- raise RuntimeError(
103
- f"adhoc query returned {len(got_before)} but {processed} records were processed: {got_before}"
104
- )
105
-
106
- self.pipeline.checkpoint(wait=True)
107
- uuid = self.pipeline.sync_checkpoint(wait=True)
108
-
109
- self.pipeline.stop(force=True)
110
-
111
- if clear_storage:
112
- self.pipeline.clear_storage()
113
-
114
- if random_uuid:
115
- uuid = uuid4()
116
-
117
- # Restart pipeline from checkpoint
118
- storage_config = storage_cfg(
119
- pipeline_name=self.pipeline.name,
120
- start_from_checkpoint=uuid if from_uuid else "latest",
121
- auth_err=auth_err,
122
- strict=strict,
123
- standby=standby,
124
- )
125
- self.pipeline.set_runtime_config(
126
- RuntimeConfig(
127
- fault_tolerance_model=ft, storage=Storage(config=storage_config)
128
- )
129
- )
130
-
131
- if not standby:
132
- self.pipeline.start()
133
- else:
134
- self.pipeline.start(wait=False)
135
-
136
- # wait for the pipeline to initialize
137
- start = time.monotonic()
138
- # wait for a maximum of 120 seconds for the pipeline to provison
139
- end = start + 120
140
-
141
- # wait for the pipeline to finish provisoning
142
- for log in self.pipeline.logs():
143
- if "checkpoint pulled successfully" in log:
144
- break
145
-
146
- if time.monotonic() > end:
147
- raise TimeoutError(
148
- f"{self.pipeline.name} timedout waiting to pull checkpoint"
149
- )
150
-
151
- if standby:
152
- # wait for 8 seconds, this should be more than enough time
153
- time.sleep(8)
154
- assert self.pipeline.status() == PipelineStatus.INITIALIZING
155
-
156
- self.pipeline.activate(timeout_s=10)
157
-
158
- got_after = list(self.pipeline.query("SELECT * FROM v0"))
159
-
160
- print(
161
- f"{self.pipeline.name}: after: {len(got_after)}, {got_after}",
162
- file=sys.stderr,
163
- )
164
-
165
- if expect_empty:
166
- got_before = []
167
-
168
- self.assertCountEqual(got_before, got_after)
169
-
170
- self.pipeline.stop(force=True)
171
-
172
- if clear_storage:
173
- self.pipeline.clear_storage()
174
-
175
- @enterprise_only
176
- def test_from_uuid(self):
177
- self.test_checkpoint_sync(from_uuid=True)
178
-
179
- @enterprise_only
180
- def test_without_clearing_storage(self):
181
- self.test_checkpoint_sync(clear_storage=False)
182
-
183
- @enterprise_only
184
- def test_autherr_fail(self):
185
- with self.assertRaisesRegex(RuntimeError, "SignatureDoesNotMatch"):
186
- self.test_checkpoint_sync(auth_err=True, strict=True)
187
-
188
- @enterprise_only
189
- def test_autherr(self):
190
- self.test_checkpoint_sync(auth_err=True, strict=False, expect_empty=True)
191
-
192
- @enterprise_only
193
- def test_nonexistent_checkpoint_fail(self):
194
- with self.assertRaisesRegex(RuntimeError, "were not found in source"):
195
- self.test_checkpoint_sync(random_uuid=True, from_uuid=True, strict=True)
196
-
197
- @enterprise_only
198
- def test_nonexistent_checkpoint(self):
199
- self.test_checkpoint_sync(random_uuid=True, from_uuid=True, expect_empty=True)
200
-
201
- @enterprise_only
202
- def test_standby_activation(self):
203
- self.test_checkpoint_sync(standby=True)
204
-
205
- @enterprise_only
206
- def test_standby_activation_from_uuid(self):
207
- self.test_checkpoint_sync(standby=True, from_uuid=True)
208
-
209
- @enterprise_only
210
- def test_standby_fallback(self, from_uuid: bool = False):
211
- # Step 1: Start main pipeline
212
- storage_config = storage_cfg(self.pipeline.name)
213
- ft = FaultToleranceModel.AtLeastOnce
214
- self.pipeline.set_runtime_config(
215
- RuntimeConfig(
216
- fault_tolerance_model=ft, storage=Storage(config=storage_config)
217
- )
218
- )
219
- self.pipeline.start()
220
-
221
- # Insert initial data
222
- random.seed(time.time())
223
- total_initial = random.randint(10, 20)
224
- data_initial = [{"c0": i, "c1": str(i)} for i in range(1, total_initial)]
225
- self.pipeline.input_json("t0", data_initial)
226
- self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
227
- self.pipeline.wait_for_completion()
228
-
229
- got_before = list(self.pipeline.query("select * from v0"))
230
-
231
- # Step 2: Create checkpoint and sync
232
- self.pipeline.checkpoint(wait=True)
233
- uuid = self.pipeline.sync_checkpoint(wait=True)
234
-
235
- # Step 3: Start standby pipeline
236
- standby = self.new_pipeline_with_suffix("standby")
237
- pull_interval = 1
238
- standby.set_runtime_config(
239
- RuntimeConfig(
240
- fault_tolerance_model=ft,
241
- storage=Storage(
242
- config=storage_cfg(
243
- self.pipeline.name,
244
- start_from_checkpoint=uuid if from_uuid else "latest",
245
- standby=True,
246
- pull_interval=pull_interval,
247
- )
248
- ),
249
- )
250
- )
251
- standby.start(wait=False)
252
-
253
- # Wait until standby pulls the first checkpoint
254
- start = time.monotonic()
255
- end = start + 120
256
- for log in standby.logs():
257
- if "checkpoint pulled successfully" in log:
258
- break
259
- if time.monotonic() > end:
260
- raise TimeoutError(
261
- "Timed out waiting for standby pipeline to pull checkpoint"
262
- )
263
-
264
- # Step 4: Add more data and make 3-10 checkpoints
265
- extra_ckpts = random.randint(3, 10)
266
- total_additional = 0
267
-
268
- for i in range(extra_ckpts):
269
- new_val = 100 + i
270
- new_data = [{"c0": new_val, "c1": f"extra_{new_val}"}]
271
- self.pipeline.input_json("t0", new_data)
272
- self.pipeline.wait_for_completion()
273
- total_additional += 1
274
- self.pipeline.checkpoint(wait=True)
275
- self.pipeline.sync_checkpoint(wait=True)
276
- time.sleep(0.2)
277
-
278
- got_expected = (
279
- got_before if from_uuid else list(self.pipeline.query("SELECT * FROM v0"))
280
- )
281
- print(
282
- f"{self.pipeline.name}: final records before shutdown: {got_expected}",
283
- file=sys.stderr,
284
- )
285
-
286
- # Step 5: Stop main and activate standby
287
- self.pipeline.stop(force=True)
288
-
289
- assert standby.status() == PipelineStatus.INITIALIZING
290
- standby.activate(timeout_s=(pull_interval * extra_ckpts) + 60)
291
-
292
- for log in standby.logs():
293
- if "activated" in log:
294
- break
295
- if time.monotonic() > end:
296
- raise TimeoutError("Timed out waiting for standby pipeline to activate")
297
-
298
- # Step 6: Validate standby has all expected records
299
- got_after = list(standby.query("SELECT * FROM v0"))
300
- print(
301
- f"{standby.name}: final records after activation: {got_after}",
302
- file=sys.stderr,
303
- )
304
- self.assertCountEqual(got_expected, got_after)
305
-
306
- # Cleanup
307
- standby.stop(force=True)
308
-
309
- standby.start()
310
- got_final = list(standby.query("SELECT * FROM v0"))
311
- standby.stop(force=True)
312
-
313
- self.assertCountEqual(got_after, got_final)
314
-
315
- self.pipeline.clear_storage()
316
-
317
- @enterprise_only
318
- def test_standby_fallback_from_uuid(self):
319
- self.test_standby_fallback(from_uuid=True)
File without changes
File without changes
File without changes
File without changes
File without changes