PyPI - feldera - Versions diffs - 0.128.0__tar.gz → 0.129.0__tar.gz - Mend

feldera 0.128.0tar.gz → 0.129.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of feldera might be problematic. Click here for more details.

Files changed (35) hide show

{feldera-0.128.0 → feldera-0.129.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: feldera
-Version: 0.128.0
+Version: 0.129.0
 Summary: The feldera python client
 Author-email: Feldera Team <dev@feldera.com>
 License: MIT
@@ -28,19 +28,19 @@ Feldera Python is the Feldera SDK for Python developers.
 ## Installation
 ```bash
-pip install feldera
+uv pip install feldera
 ```
 ### Installing from Github
 ```bash
-pip install git+https://github.com/feldera/feldera#subdirectory=python
+uv pip install git+https://github.com/feldera/feldera#subdirectory=python
 ```
 Similarly, to install from a specific branch:
 ```bash
-$ pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
+uv pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
 ```
 Replace `{BRANCH_NAME}` with the name of the branch you want to install from.
@@ -51,7 +51,12 @@ If you have cloned the Feldera repo, you can install the python SDK as follows:
 ```bash
 # the Feldera Python SDK is present inside the python/ directory
-pip install python/
+cd python
+# If you don't have a virtual environment, create one
+uv venv
+source .venv/activate
+# Install the SDK in editable mode
+uv pip install .
 ```
 ## Documentation
@@ -61,7 +66,7 @@ The Python SDK documentation is available at
 To build the html documentation run:
-Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
+Ensure that you have sphinx installed. If not, install it using `uv pip install sphinx`.
 Then run the following commands:
@@ -92,7 +97,13 @@ To run tests from a specific file:
 (cd python && python3 -m pytest ./tests/path-to-file.py)
 ```
-#### Running Tests
+To run a specific test:
+```bash
+uv run python -m pytest tests/test_shared_pipeline.py::TestPipeline::test_adhoc_query_hash -v
+```
+#### Running All Tests
 The tests validate end-to-end correctness of SQL functionality.  To
 run the tests use:

{feldera-0.128.0 → feldera-0.129.0}/README.md RENAMED Viewed

@@ -5,19 +5,19 @@ Feldera Python is the Feldera SDK for Python developers.
 ## Installation
 ```bash
-pip install feldera
+uv pip install feldera
 ```
 ### Installing from Github
 ```bash
-pip install git+https://github.com/feldera/feldera#subdirectory=python
+uv pip install git+https://github.com/feldera/feldera#subdirectory=python
 ```
 Similarly, to install from a specific branch:
 ```bash
-$ pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
+uv pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
 ```
 Replace `{BRANCH_NAME}` with the name of the branch you want to install from.
@@ -28,7 +28,12 @@ If you have cloned the Feldera repo, you can install the python SDK as follows:
 ```bash
 # the Feldera Python SDK is present inside the python/ directory
-pip install python/
+cd python
+# If you don't have a virtual environment, create one
+uv venv
+source .venv/activate
+# Install the SDK in editable mode
+uv pip install .
 ```
 ## Documentation
@@ -38,7 +43,7 @@ The Python SDK documentation is available at
 To build the html documentation run:
-Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
+Ensure that you have sphinx installed. If not, install it using `uv pip install sphinx`.
 Then run the following commands:
@@ -69,7 +74,13 @@ To run tests from a specific file:
 (cd python && python3 -m pytest ./tests/path-to-file.py)
 ```
-#### Running Tests
+To run a specific test:
+```bash
+uv run python -m pytest tests/test_shared_pipeline.py::TestPipeline::test_adhoc_query_hash -v
+```
+#### Running All Tests
 The tests validate end-to-end correctness of SQL functionality.  To
 run the tests use:

{feldera-0.128.0 → feldera-0.129.0}/feldera/pipeline.py RENAMED Viewed

@@ -809,6 +809,25 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
         return self.client.query_as_text(self.name, query)
+    def query_hash(self, query: str):
+        """
+        Executes an ad-hoc SQL query on this pipeline and returns the result
+        as a hash of the result set. This is useful for quickly checking
+        if the result set has changed without retrieving the entire result.
+        Note:
+            For a stable hash, the query must be deterministic which means
+            it should be sorted.
+        :param query: The SQL query to be executed.
+        :raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
+            state.
+        :raises FelderaAPIError: If querying a non materialized table or view.
+        :raises FelderaAPIError: If the query is invalid.
+        """
+        return self.client.query_as_hash(self.name, query)
     def execute(self, query: str):
         """
         Executes an ad-hoc SQL query on the current pipeline, discarding its

{feldera-0.128.0 → feldera-0.129.0}/feldera/rest/_httprequests.py RENAMED Viewed

@@ -57,6 +57,7 @@ class HttpRequests:
         self.headers["Content-Type"] = content_type
         try:
+            conn_timeout = self.config.connection_timeout
             timeout = self.config.timeout
             headers = self.headers
@@ -74,7 +75,7 @@ class HttpRequests:
                 if http_method.__name__ == "get":
                     request = http_method(
                         request_path,
-                        timeout=timeout,
+                        timeout=(conn_timeout, timeout),
                         headers=headers,
                         params=params,
                         stream=stream,
@@ -83,7 +84,7 @@ class HttpRequests:
                 elif isinstance(body, bytes):
                     request = http_method(
                         request_path,
-                        timeout=timeout,
+                        timeout=(conn_timeout, timeout),
                         headers=headers,
                         data=body,
                         params=params,
@@ -93,7 +94,7 @@ class HttpRequests:
                 else:
                     request = http_method(
                         request_path,
-                        timeout=timeout,
+                        timeout=(conn_timeout, timeout),
                         headers=headers,
                         data=json_serialize(body) if serialize else body,
                         params=params,
@@ -118,9 +119,18 @@ class HttpRequests:
                             time.sleep(2)  # backoff, adjust as needed
                             continue
                     raise  # re-raise for all other errors or if out of retries
+                except requests.exceptions.Timeout as err:
+                    if attempt < max_retries:
+                        logging.warning(
+                            "HTTP Connection Timeout for %s, retrying (%d/%d)...",
+                            path,
+                            attempt + 1,
+                            max_retries,
+                        )
+                        time.sleep(2)
+                        continue
+                    raise FelderaTimeoutError(str(err)) from err
-        except requests.exceptions.Timeout as err:
-            raise FelderaTimeoutError(str(err)) from err
         except requests.exceptions.ConnectionError as err:
             raise FelderaCommunicationError(str(err)) from err

{feldera-0.128.0 → feldera-0.129.0}/feldera/rest/config.py RENAMED Viewed

@@ -12,6 +12,7 @@ class Config:
         api_key: Optional[str] = None,
         version: Optional[str] = None,
         timeout: Optional[float] = None,
+        connection_timeout: Optional[float] = None,
         requests_verify: bool = True,
     ) -> None:
         """
@@ -19,6 +20,7 @@ class Config:
         :param api_key: The optional API key to access Feldera
         :param version: The version of the API to use
         :param timeout: The timeout for the HTTP requests
+        :param connection_timeout: The connection timeout for the HTTP requests
         :param requests_verify: The `verify` parameter passed to the requests
             library. `True` by default.
         """
@@ -27,4 +29,5 @@ class Config:
         self.api_key: Optional[str] = api_key
         self.version: Optional[str] = version or "v0"
         self.timeout: Optional[float] = timeout
+        self.connection_timeout: Optional[float] = connection_timeout
         self.requests_verify: bool = requests_verify

{feldera-0.128.0 → feldera-0.129.0}/feldera/rest/feldera_client.py RENAMED Viewed

@@ -46,20 +46,26 @@ class FelderaClient:
         url: str,
         api_key: Optional[str] = None,
         timeout: Optional[float] = None,
+        connection_timeout: Optional[float] = None,
         requests_verify: bool = True,
     ) -> None:
         """
         :param url: The url to Feldera API (ex: https://try.feldera.com)
         :param api_key: The optional API key for Feldera
-        :param timeout: (optional) The amount of time in seconds that the client
-            will wait for a response before timing
-            out.
+        :param timeout: (optional) The amount of time in seconds that the
+            client will wait for a response before timing out.
+        :param connection_timeout: (optional) The amount of time in seconds that
+            the client will wait to establish connection before timing out.
         :param requests_verify: The `verify` parameter passed to the requests
             library. `True` by default.
         """
         self.config = Config(
-            url, api_key, timeout=timeout, requests_verify=requests_verify
+            url,
+            api_key,
+            timeout=timeout,
+            connection_timeout=connection_timeout,
+            requests_verify=requests_verify,
         )
         self.http = HttpRequests(self.config)
@@ -895,6 +901,27 @@ Reason: The pipeline is in a STOPPED state due to the following error:
             if chunk:
                 yield chunk.decode("utf-8")
+    def query_as_hash(self, pipeline_name: str, query: str) -> str:
+        """
+        Executes an ad-hoc query on the specified pipeline and returns a hash of the result.
+        :param pipeline_name: The name of the pipeline to query.
+        :param query: The SQL query to be executed.
+        :return: A string containing the hash of the query result.
+        """
+        params = {
+            "pipeline_name": pipeline_name,
+            "sql": query,
+            "format": "hash",
+        }
+        resp = self.http.get(
+            path=f"/pipelines/{pipeline_name}/query",
+            params=params,
+            stream=False,
+        )
+        return resp
     def query_as_parquet(self, pipeline_name: str, query: str, path: str):
         """
         Executes an ad-hoc query on the specified pipeline and saves the result to a parquet file.

{feldera-0.128.0 → feldera-0.129.0}/feldera/stats.py RENAMED Viewed

@@ -26,8 +26,8 @@ class PipelineStatistics:
         pipeline.inputs = [
             InputEndpointStatus.from_dict(input) for input in d["inputs"]
         ]
-        pipeline.inputs = [
-            OutputEndpointStatus().from_dict(output) for output in d["outputs"]
+        pipeline.outputs = [
+            OutputEndpointStatus.from_dict(output) for output in d["outputs"]
         ]
         return pipeline
@@ -46,7 +46,7 @@ class GlobalPipelineMetrics:
         self.rss_bytes: Optional[int] = None
         self.cpu_msecs: Optional[int] = None
         self.start_time: Optional[datetime] = None
-        self.incarnation_uuid: Optional[uuid] = None
+        self.incarnation_uuid: Optional[uuid.UUID] = None
         self.storage_bytes: Optional[int] = None
         self.storage_mb_secs: Optional[int] = None
         self.runtime_elapsed_msecs: Optional[int] = None

{feldera-0.128.0 → feldera-0.129.0}/feldera.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: feldera
-Version: 0.128.0
+Version: 0.129.0
 Summary: The feldera python client
 Author-email: Feldera Team <dev@feldera.com>
 License: MIT
@@ -28,19 +28,19 @@ Feldera Python is the Feldera SDK for Python developers.
 ## Installation
 ```bash
-pip install feldera
+uv pip install feldera
 ```
 ### Installing from Github
 ```bash
-pip install git+https://github.com/feldera/feldera#subdirectory=python
+uv pip install git+https://github.com/feldera/feldera#subdirectory=python
 ```
 Similarly, to install from a specific branch:
 ```bash
-$ pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
+uv pip install git+https://github.com/feldera/feldera@{BRANCH_NAME}#subdirectory=python
 ```
 Replace `{BRANCH_NAME}` with the name of the branch you want to install from.
@@ -51,7 +51,12 @@ If you have cloned the Feldera repo, you can install the python SDK as follows:
 ```bash
 # the Feldera Python SDK is present inside the python/ directory
-pip install python/
+cd python
+# If you don't have a virtual environment, create one
+uv venv
+source .venv/activate
+# Install the SDK in editable mode
+uv pip install .
 ```
 ## Documentation
@@ -61,7 +66,7 @@ The Python SDK documentation is available at
 To build the html documentation run:
-Ensure that you have sphinx installed. If not, install it using `pip install sphinx`.
+Ensure that you have sphinx installed. If not, install it using `uv pip install sphinx`.
 Then run the following commands:
@@ -92,7 +97,13 @@ To run tests from a specific file:
 (cd python && python3 -m pytest ./tests/path-to-file.py)
 ```
-#### Running Tests
+To run a specific test:
+```bash
+uv run python -m pytest tests/test_shared_pipeline.py::TestPipeline::test_adhoc_query_hash -v
+```
+#### Running All Tests
 The tests validate end-to-end correctness of SQL functionality.  To
 run the tests use:

{feldera-0.128.0 → feldera-0.129.0}/feldera.egg-info/SOURCES.txt RENAMED Viewed

@@ -25,6 +25,7 @@ feldera/rest/pipeline.py
 feldera/rest/sql_table.py
 feldera/rest/sql_view.py
 tests/test_checkpoint_sync.py
+tests/test_issue4457.py
 tests/test_pipeline_builder.py
 tests/test_shared_pipeline.py
 tests/test_shared_pipeline_stress.py

{feldera-0.128.0 → feldera-0.129.0}/pyproject.toml RENAMED Viewed

@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "feldera"
 readme = "README.md"
 description = "The feldera python client"
-version = "0.128.0"
+version = "0.129.0"
 license = { text = "MIT" }
 requires-python = ">=3.10"
 authors = [

feldera-0.129.0/tests/test_checkpoint_sync.py ADDED Viewed

@@ -0,0 +1,320 @@
+# from tests.shared_test_pipeline import SharedTestPipeline
+# from tests import enterprise_only
+# from feldera.runtime_config import RuntimeConfig, Storage
+# from feldera.enums import PipelineStatus, FaultToleranceModel
+# from typing import Optional
+# import os
+# import sys
+# import time
+# from uuid import uuid4
+# import random
+# import pytest
+#
+#
+# DEFAULT_ENDPOINT = os.environ.get(
+#     "DEFAULT_MINIO_ENDPOINT", "http://minio.extra.svc.cluster.local:9000"
+# )
+# DEFAULT_BUCKET = "default"
+# ACCESS_KEY = "minio"
+# SECRET_KEY = "miniopasswd"
+#
+#
+# def storage_cfg(
+#     pipeline_name: str,
+#     endpoint: Optional[str] = None,
+#     start_from_checkpoint: Optional[str] = None,
+#     strict: bool = False,
+#     auth_err: bool = False,
+#     standby: bool = False,
+#     pull_interval: int = 2,
+# ) -> dict:
+#     return {
+#         "backend": {
+#             "name": "file",
+#             "config": {
+#                 "sync": {
+#                     "bucket": f"{DEFAULT_BUCKET}/{pipeline_name}",
+#                     "access_key": ACCESS_KEY,
+#                     "secret_key": SECRET_KEY if not auth_err else SECRET_KEY + "extra",
+#                     "provider": "Minio",
+#                     "endpoint": endpoint or DEFAULT_ENDPOINT,
+#                     "start_from_checkpoint": start_from_checkpoint,
+#                     "fail_if_no_checkpoint": strict,
+#                     "standby": standby,
+#                     "pull_interval": pull_interval,
+#                 }
+#             },
+#         }
+#     }
+#
+#
+# class TestCheckpointSync(SharedTestPipeline):
+#     @enterprise_only
+#     def test_checkpoint_sync(
+#         self,
+#         from_uuid: bool = False,
+#         random_uuid: bool = False,
+#         clear_storage: bool = True,
+#         auth_err: bool = False,
+#         strict: bool = False,
+#         expect_empty: bool = False,
+#         standby: bool = False,
+#     ):
+#         """
+#         CREATE TABLE t0 (c0 INT, c1 VARCHAR);
+#         CREATE MATERIALIZED VIEW v0 AS SELECT * FROM t0;
+#         """
+#
+#         storage_config = storage_cfg(self.pipeline.name)
+#         ft = FaultToleranceModel.AtLeastOnce
+#
+#         self.pipeline.set_runtime_config(
+#             RuntimeConfig(
+#                 fault_tolerance_model=ft, storage=Storage(config=storage_config)
+#             )
+#         )
+#         self.pipeline.start()
+#
+#         random.seed(time.time())
+#         total = random.randint(10, 20)
+#         data = [{"c0": i, "c1": str(i)} for i in range(1, total)]
+#         self.pipeline.input_json("t0", data)
+#         self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
+#
+#         start = time.time()
+#         timeout = 5
+#
+#         while True:
+#             processed = self.pipeline.stats().global_metrics.total_processed_records
+#             if processed == total:
+#                 break
+#
+#             if time.time() - start > timeout:
+#                 raise TimeoutError(
+#                     f"timed out while waiting for pipeline to process {total} records"
+#                 )
+#
+#             time.sleep(0.1)
+#
+#         got_before = list(self.pipeline.query("SELECT * FROM v0"))
+#         print(f"{self.pipeline.name}: records: {total}, {got_before}", file=sys.stderr)
+#
+#         if len(got_before) != processed:
+#             raise RuntimeError(
+#                 f"adhoc query returned {len(got_before)} but {processed} records were processed: {got_before}"
+#             )
+#
+#         self.pipeline.checkpoint(wait=True)
+#         uuid = self.pipeline.sync_checkpoint(wait=True)
+#
+#         self.pipeline.stop(force=True)
+#
+#         if clear_storage:
+#             self.pipeline.clear_storage()
+#
+#         if random_uuid:
+#             uuid = uuid4()
+#
+#         # Restart pipeline from checkpoint
+#         storage_config = storage_cfg(
+#             pipeline_name=self.pipeline.name,
+#             start_from_checkpoint=uuid if from_uuid else "latest",
+#             auth_err=auth_err,
+#             strict=strict,
+#             standby=standby,
+#         )
+#         self.pipeline.set_runtime_config(
+#             RuntimeConfig(
+#                 fault_tolerance_model=ft, storage=Storage(config=storage_config)
+#             )
+#         )
+#
+#         if not standby:
+#             self.pipeline.start()
+#         else:
+#             self.pipeline.start(wait=False)
+#
+#             # wait for the pipeline to initialize
+#             start = time.monotonic()
+#             # wait for a maximum of 120 seconds for the pipeline to provison
+#             end = start + 120
+#
+#             # wait for the pipeline to finish provisoning
+#             for log in self.pipeline.logs():
+#                 if "checkpoint pulled successfully" in log:
+#                     break
+#
+#                 if time.monotonic() > end:
+#                     raise TimeoutError(
+#                         f"{self.pipeline.name} timedout waiting to pull checkpoint"
+#                     )
+#
+#             if standby:
+#                 # wait for 8 seconds, this should be more than enough time
+#                 time.sleep(8)
+#                 assert self.pipeline.status() == PipelineStatus.INITIALIZING
+#
+#                 self.pipeline.activate(timeout_s=10)
+#
+#         got_after = list(self.pipeline.query("SELECT * FROM v0"))
+#
+#         print(
+#             f"{self.pipeline.name}: after: {len(got_after)}, {got_after}",
+#             file=sys.stderr,
+#         )
+#
+#         if expect_empty:
+#             got_before = []
+#
+#         self.assertCountEqual(got_before, got_after)
+#
+#         self.pipeline.stop(force=True)
+#
+#         if clear_storage:
+#             self.pipeline.clear_storage()
+#
+#     @enterprise_only
+#     def test_from_uuid(self):
+#         self.test_checkpoint_sync(from_uuid=True)
+#
+#     @enterprise_only
+#     def test_without_clearing_storage(self):
+#         self.test_checkpoint_sync(clear_storage=False)
+#
+#     @enterprise_only
+#     def test_autherr_fail(self):
+#         with self.assertRaisesRegex(RuntimeError, "SignatureDoesNotMatch"):
+#             self.test_checkpoint_sync(auth_err=True, strict=True)
+#
+#     @enterprise_only
+#     def test_autherr(self):
+#         self.test_checkpoint_sync(auth_err=True, strict=False, expect_empty=True)
+#
+#     @enterprise_only
+#     def test_nonexistent_checkpoint_fail(self):
+#         with self.assertRaisesRegex(RuntimeError, "were not found in source"):
+#             self.test_checkpoint_sync(random_uuid=True, from_uuid=True, strict=True)
+#
+#     @enterprise_only
+#     def test_nonexistent_checkpoint(self):
+#         self.test_checkpoint_sync(random_uuid=True, from_uuid=True, expect_empty=True)
+#
+#     @enterprise_only
+#     def test_standby_activation(self):
+#         self.test_checkpoint_sync(standby=True)
+#
+#     @enterprise_only
+#     def test_standby_activation_from_uuid(self):
+#         self.test_checkpoint_sync(standby=True, from_uuid=True)
+#
+#     @enterprise_only
+#     def test_standby_fallback(self, from_uuid: bool = False):
+#         # Step 1: Start main pipeline
+#         storage_config = storage_cfg(self.pipeline.name)
+#         ft = FaultToleranceModel.AtLeastOnce
+#         self.pipeline.set_runtime_config(
+#             RuntimeConfig(
+#                 fault_tolerance_model=ft, storage=Storage(config=storage_config)
+#             )
+#         )
+#         self.pipeline.start()
+#
+#         # Insert initial data
+#         random.seed(time.time())
+#         total_initial = random.randint(10, 20)
+#         data_initial = [{"c0": i, "c1": str(i)} for i in range(1, total_initial)]
+#         self.pipeline.input_json("t0", data_initial)
+#         self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
+#         self.pipeline.wait_for_completion()
+#
+#         got_before = list(self.pipeline.query("select * from v0"))
+#
+#         # Step 2: Create checkpoint and sync
+#         self.pipeline.checkpoint(wait=True)
+#         uuid = self.pipeline.sync_checkpoint(wait=True)
+#
+#         # Step 3: Start standby pipeline
+#         standby = self.new_pipeline_with_suffix("standby")
+#         pull_interval = 1
+#         standby.set_runtime_config(
+#             RuntimeConfig(
+#                 fault_tolerance_model=ft,
+#                 storage=Storage(
+#                     config=storage_cfg(
+#                         self.pipeline.name,
+#                         start_from_checkpoint=uuid if from_uuid else "latest",
+#                         standby=True,
+#                         pull_interval=pull_interval,
+#                     )
+#                 ),
+#             )
+#         )
+#         standby.start(wait=False)
+#
+#         # Wait until standby pulls the first checkpoint
+#         start = time.monotonic()
+#         end = start + 120
+#         for log in standby.logs():
+#             if "checkpoint pulled successfully" in log:
+#                 break
+#             if time.monotonic() > end:
+#                 raise TimeoutError(
+#                     "Timed out waiting for standby pipeline to pull checkpoint"
+#                 )
+#
+#         # Step 4: Add more data and make 3-10 checkpoints
+#         extra_ckpts = random.randint(3, 10)
+#         total_additional = 0
+#
+#         for i in range(extra_ckpts):
+#             new_val = 100 + i
+#             new_data = [{"c0": new_val, "c1": f"extra_{new_val}"}]
+#             self.pipeline.input_json("t0", new_data)
+#             self.pipeline.wait_for_completion()
+#             total_additional += 1
+#             self.pipeline.checkpoint(wait=True)
+#             self.pipeline.sync_checkpoint(wait=True)
+#             time.sleep(0.2)
+#
+#         got_expected = (
+#             got_before if from_uuid else list(self.pipeline.query("SELECT * FROM v0"))
+#         )
+#         print(
+#             f"{self.pipeline.name}: final records before shutdown: {got_expected}",
+#             file=sys.stderr,
+#         )
+#
+#         # Step 5: Stop main and activate standby
+#         self.pipeline.stop(force=True)
+#
+#         assert standby.status() == PipelineStatus.INITIALIZING
+#         standby.activate(timeout_s=(pull_interval * extra_ckpts) + 60)
+#
+#         for log in standby.logs():
+#             if "activated" in log:
+#                 break
+#             if time.monotonic() > end:
+#                 raise TimeoutError("Timed out waiting for standby pipeline to activate")
+#
+#         # Step 6: Validate standby has all expected records
+#         got_after = list(standby.query("SELECT * FROM v0"))
+#         print(
+#             f"{standby.name}: final records after activation: {got_after}",
+#             file=sys.stderr,
+#         )
+#         self.assertCountEqual(got_expected, got_after)
+#
+#         # Cleanup
+#         standby.stop(force=True)
+#
+#         standby.start()
+#         got_final = list(standby.query("SELECT * FROM v0"))
+#         standby.stop(force=True)
+#
+#         self.assertCountEqual(got_after, got_final)
+#
+#         self.pipeline.clear_storage()
+#
+#     @enterprise_only
+#     def test_standby_fallback_from_uuid(self):
+#         self.test_standby_fallback(from_uuid=True)

feldera-0.129.0/tests/test_issue4457.py ADDED Viewed

@@ -0,0 +1,57 @@
+import unittest
+from pandas import Timestamp
+from feldera import PipelineBuilder
+from tests import TEST_CLIENT
+class TestIssue_4457(unittest.TestCase):
+    def test_local(self):
+        sql = """
+        CREATE TABLE test_events (
+            id VARCHAR NOT NULL PRIMARY KEY,
+            a VARCHAR,
+            t TIMESTAMP NOT NULL LATENESS INTERVAL 1 MINUTE
+        );
+        CREATE VIEW V AS SELECT * FROM test_events;
+        """
+        pipeline = PipelineBuilder(
+            TEST_CLIENT, name="test_issue4457", sql=sql
+        ).create_or_replace()
+        # TODO: use .query() instead
+        out = pipeline.listen("v")
+        pipeline.start()
+        pipeline.input_json(
+            "test_events",
+            [{"id": "a", "a": "test4", "t": "2025-05-20 21:00:17.920"}],
+        )
+        pipeline.wait_for_idle()
+        output = out.to_dict()
+        assert output == [
+            {
+                "id": "a",
+                "a": "test4",
+                "t": Timestamp("2025-05-20 21:00:17.920"),
+                "insert_delete": 1,
+            }
+        ]
+        pipeline.input_json(
+            "test_events",
+            [{"id": "a", "a": "test5", "t": "2025-03-20 21:00:17.920"}],
+        )
+        pipeline.wait_for_idle()
+        output = out.to_dict()
+        assert output == []
+        pipeline.stop(force=True)
+if __name__ == "__main__":
+    unittest.main()

{feldera-0.128.0 → feldera-0.129.0}/tests/test_shared_pipeline.py RENAMED Viewed

@@ -84,6 +84,17 @@ class TestPipeline(SharedTestPipeline):
         got = "\n".join(resp)
         assert got in expected
+    def test_adhoc_query_hash(self):
+        data = "1\n2\n"
+        self.pipeline.start()
+        TEST_CLIENT.push_to_pipeline(self.pipeline.name, "tbl", "csv", data)
+        resp = TEST_CLIENT.query_as_hash(
+            self.pipeline.name, "SELECT * FROM tbl ORDER BY id"
+        )
+        assert (
+            resp == "0B021466CA428474EF16F899D0F841D7338C168C063DA5DB43666D1AB3081558"
+        )
     def test_adhoc_query_parquet(self):
         data = "1\n2\n"
         self.pipeline.start()

feldera-0.128.0/tests/test_checkpoint_sync.py DELETED Viewed

@@ -1,319 +0,0 @@
-from tests.shared_test_pipeline import SharedTestPipeline
-from tests import enterprise_only
-from feldera.runtime_config import RuntimeConfig, Storage
-from feldera.enums import PipelineStatus, FaultToleranceModel
-from typing import Optional
-import os
-import sys
-import time
-from uuid import uuid4
-import random
-DEFAULT_ENDPOINT = os.environ.get(
-    "DEFAULT_MINIO_ENDPOINT", "http://minio.extra.svc.cluster.local:9000"
-)
-DEFAULT_BUCKET = "default"
-ACCESS_KEY = "minio"
-SECRET_KEY = "miniopasswd"
-def storage_cfg(
-    pipeline_name: str,
-    endpoint: Optional[str] = None,
-    start_from_checkpoint: Optional[str] = None,
-    strict: bool = False,
-    auth_err: bool = False,
-    standby: bool = False,
-    pull_interval: int = 2,
-) -> dict:
-    return {
-        "backend": {
-            "name": "file",
-            "config": {
-                "sync": {
-                    "bucket": f"{DEFAULT_BUCKET}/{pipeline_name}",
-                    "access_key": ACCESS_KEY,
-                    "secret_key": SECRET_KEY if not auth_err else SECRET_KEY + "extra",
-                    "provider": "Minio",
-                    "endpoint": endpoint or DEFAULT_ENDPOINT,
-                    "start_from_checkpoint": start_from_checkpoint,
-                    "fail_if_no_checkpoint": strict,
-                    "standby": standby,
-                    "pull_interval": pull_interval,
-                }
-            },
-        }
-    }
-class TestCheckpointSync(SharedTestPipeline):
-    @enterprise_only
-    def test_checkpoint_sync(
-        self,
-        from_uuid: bool = False,
-        random_uuid: bool = False,
-        clear_storage: bool = True,
-        auth_err: bool = False,
-        strict: bool = False,
-        expect_empty: bool = False,
-        standby: bool = False,
-    ):
-        """
-        CREATE TABLE t0 (c0 INT, c1 VARCHAR);
-        CREATE MATERIALIZED VIEW v0 AS SELECT * FROM t0;
-        """
-        storage_config = storage_cfg(self.pipeline.name)
-        ft = FaultToleranceModel.AtLeastOnce
-        self.pipeline.set_runtime_config(
-            RuntimeConfig(
-                fault_tolerance_model=ft, storage=Storage(config=storage_config)
-            )
-        )
-        self.pipeline.start()
-        random.seed(time.time())
-        total = random.randint(10, 20)
-        data = [{"c0": i, "c1": str(i)} for i in range(1, total)]
-        self.pipeline.input_json("t0", data)
-        self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
-        start = time.time()
-        timeout = 5
-        while True:
-            processed = self.pipeline.stats().global_metrics.total_processed_records
-            if processed == total:
-                break
-            if time.time() - start > timeout:
-                raise TimeoutError(
-                    f"timed out while waiting for pipeline to process {total} records"
-                )
-            time.sleep(0.1)
-        got_before = list(self.pipeline.query("SELECT * FROM v0"))
-        print(f"{self.pipeline.name}: records: {total}, {got_before}", file=sys.stderr)
-        if len(got_before) != processed:
-            raise RuntimeError(
-                f"adhoc query returned {len(got_before)} but {processed} records were processed: {got_before}"
-            )
-        self.pipeline.checkpoint(wait=True)
-        uuid = self.pipeline.sync_checkpoint(wait=True)
-        self.pipeline.stop(force=True)
-        if clear_storage:
-            self.pipeline.clear_storage()
-        if random_uuid:
-            uuid = uuid4()
-        # Restart pipeline from checkpoint
-        storage_config = storage_cfg(
-            pipeline_name=self.pipeline.name,
-            start_from_checkpoint=uuid if from_uuid else "latest",
-            auth_err=auth_err,
-            strict=strict,
-            standby=standby,
-        )
-        self.pipeline.set_runtime_config(
-            RuntimeConfig(
-                fault_tolerance_model=ft, storage=Storage(config=storage_config)
-            )
-        )
-        if not standby:
-            self.pipeline.start()
-        else:
-            self.pipeline.start(wait=False)
-            # wait for the pipeline to initialize
-            start = time.monotonic()
-            # wait for a maximum of 120 seconds for the pipeline to provison
-            end = start + 120
-            # wait for the pipeline to finish provisoning
-            for log in self.pipeline.logs():
-                if "checkpoint pulled successfully" in log:
-                    break
-                if time.monotonic() > end:
-                    raise TimeoutError(
-                        f"{self.pipeline.name} timedout waiting to pull checkpoint"
-                    )
-            if standby:
-                # wait for 8 seconds, this should be more than enough time
-                time.sleep(8)
-                assert self.pipeline.status() == PipelineStatus.INITIALIZING
-                self.pipeline.activate(timeout_s=10)
-        got_after = list(self.pipeline.query("SELECT * FROM v0"))
-        print(
-            f"{self.pipeline.name}: after: {len(got_after)}, {got_after}",
-            file=sys.stderr,
-        )
-        if expect_empty:
-            got_before = []
-        self.assertCountEqual(got_before, got_after)
-        self.pipeline.stop(force=True)
-        if clear_storage:
-            self.pipeline.clear_storage()
-    @enterprise_only
-    def test_from_uuid(self):
-        self.test_checkpoint_sync(from_uuid=True)
-    @enterprise_only
-    def test_without_clearing_storage(self):
-        self.test_checkpoint_sync(clear_storage=False)
-    @enterprise_only
-    def test_autherr_fail(self):
-        with self.assertRaisesRegex(RuntimeError, "SignatureDoesNotMatch"):
-            self.test_checkpoint_sync(auth_err=True, strict=True)
-    @enterprise_only
-    def test_autherr(self):
-        self.test_checkpoint_sync(auth_err=True, strict=False, expect_empty=True)
-    @enterprise_only
-    def test_nonexistent_checkpoint_fail(self):
-        with self.assertRaisesRegex(RuntimeError, "were not found in source"):
-            self.test_checkpoint_sync(random_uuid=True, from_uuid=True, strict=True)
-    @enterprise_only
-    def test_nonexistent_checkpoint(self):
-        self.test_checkpoint_sync(random_uuid=True, from_uuid=True, expect_empty=True)
-    @enterprise_only
-    def test_standby_activation(self):
-        self.test_checkpoint_sync(standby=True)
-    @enterprise_only
-    def test_standby_activation_from_uuid(self):
-        self.test_checkpoint_sync(standby=True, from_uuid=True)
-    @enterprise_only
-    def test_standby_fallback(self, from_uuid: bool = False):
-        # Step 1: Start main pipeline
-        storage_config = storage_cfg(self.pipeline.name)
-        ft = FaultToleranceModel.AtLeastOnce
-        self.pipeline.set_runtime_config(
-            RuntimeConfig(
-                fault_tolerance_model=ft, storage=Storage(config=storage_config)
-            )
-        )
-        self.pipeline.start()
-        # Insert initial data
-        random.seed(time.time())
-        total_initial = random.randint(10, 20)
-        data_initial = [{"c0": i, "c1": str(i)} for i in range(1, total_initial)]
-        self.pipeline.input_json("t0", data_initial)
-        self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
-        self.pipeline.wait_for_completion()
-        got_before = list(self.pipeline.query("select * from v0"))
-        # Step 2: Create checkpoint and sync
-        self.pipeline.checkpoint(wait=True)
-        uuid = self.pipeline.sync_checkpoint(wait=True)
-        # Step 3: Start standby pipeline
-        standby = self.new_pipeline_with_suffix("standby")
-        pull_interval = 1
-        standby.set_runtime_config(
-            RuntimeConfig(
-                fault_tolerance_model=ft,
-                storage=Storage(
-                    config=storage_cfg(
-                        self.pipeline.name,
-                        start_from_checkpoint=uuid if from_uuid else "latest",
-                        standby=True,
-                        pull_interval=pull_interval,
-                    )
-                ),
-            )
-        )
-        standby.start(wait=False)
-        # Wait until standby pulls the first checkpoint
-        start = time.monotonic()
-        end = start + 120
-        for log in standby.logs():
-            if "checkpoint pulled successfully" in log:
-                break
-            if time.monotonic() > end:
-                raise TimeoutError(
-                    "Timed out waiting for standby pipeline to pull checkpoint"
-                )
-        # Step 4: Add more data and make 3-10 checkpoints
-        extra_ckpts = random.randint(3, 10)
-        total_additional = 0
-        for i in range(extra_ckpts):
-            new_val = 100 + i
-            new_data = [{"c0": new_val, "c1": f"extra_{new_val}"}]
-            self.pipeline.input_json("t0", new_data)
-            self.pipeline.wait_for_completion()
-            total_additional += 1
-            self.pipeline.checkpoint(wait=True)
-            self.pipeline.sync_checkpoint(wait=True)
-            time.sleep(0.2)
-        got_expected = (
-            got_before if from_uuid else list(self.pipeline.query("SELECT * FROM v0"))
-        )
-        print(
-            f"{self.pipeline.name}: final records before shutdown: {got_expected}",
-            file=sys.stderr,
-        )
-        # Step 5: Stop main and activate standby
-        self.pipeline.stop(force=True)
-        assert standby.status() == PipelineStatus.INITIALIZING
-        standby.activate(timeout_s=(pull_interval * extra_ckpts) + 60)
-        for log in standby.logs():
-            if "activated" in log:
-                break
-            if time.monotonic() > end:
-                raise TimeoutError("Timed out waiting for standby pipeline to activate")
-        # Step 6: Validate standby has all expected records
-        got_after = list(standby.query("SELECT * FROM v0"))
-        print(
-            f"{standby.name}: final records after activation: {got_after}",
-            file=sys.stderr,
-        )
-        self.assertCountEqual(got_expected, got_after)
-        # Cleanup
-        standby.stop(force=True)
-        standby.start()
-        got_final = list(standby.query("SELECT * FROM v0"))
-        standby.stop(force=True)
-        self.assertCountEqual(got_after, got_final)
-        self.pipeline.clear_storage()
-    @enterprise_only
-    def test_standby_fallback_from_uuid(self):
-        self.test_standby_fallback(from_uuid=True)