PyPI - feldera - Versions diffs - 0.117.0__tar.gz → 0.119.0__tar.gz - Mend

feldera 0.117.0tar.gz → 0.119.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of feldera might be problematic. Click here for more details.

Files changed (33) hide show

{feldera-0.117.0 → feldera-0.119.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: feldera
-Version: 0.117.0
+Version: 0.119.0
 Summary: The feldera python client
 Author-email: Feldera Team <dev@feldera.com>
 License: MIT
@@ -84,7 +84,7 @@ cd python && python3 -m pytest tests/
 - This will detect and run all test files that match the pattern `test_*.py` or
   `*_test.py`.
 - By default, the tests expect a running Feldera instance at `http://localhost:8080`.
-  To override the default endpoint, set the `FELDERA_BASE_URL` environment variable.
+  To override the default endpoint, set the `FELDERA_HOST` environment variable.
 To run tests from a specific file:

{feldera-0.117.0 → feldera-0.119.0}/README.md RENAMED Viewed

@@ -61,7 +61,7 @@ cd python && python3 -m pytest tests/
 - This will detect and run all test files that match the pattern `test_*.py` or
   `*_test.py`.
 - By default, the tests expect a running Feldera instance at `http://localhost:8080`.
-  To override the default endpoint, set the `FELDERA_BASE_URL` environment variable.
+  To override the default endpoint, set the `FELDERA_HOST` environment variable.
 To run tests from a specific file:

{feldera-0.117.0 → feldera-0.119.0}/feldera/pipeline.py RENAMED Viewed

@@ -142,6 +142,7 @@ class Pipeline:
         data: Dict | list,
         update_format: str = "raw",
         force: bool = False,
+        wait: bool = True,
     ):
         """
         Push this JSON data to the specified table of the pipeline.
@@ -155,6 +156,7 @@ class Pipeline:
         :param update_format: The update format of the JSON data to be pushed to the pipeline. Must be one of:
             "raw", "insert_delete". https://docs.feldera.com/formats/json#the-insertdelete-format
         :param force: `True` to push data even if the pipeline is paused. `False` by default.
+        :param wait: If True, blocks until this input has been processed by the pipeline
         :raises ValueError: If the update format is invalid.
         :raises FelderaAPIError: If the pipeline is not in a valid state to push data.
@@ -177,6 +179,7 @@ class Pipeline:
             update_format=update_format,
             array=array,
             force=force,
+            wait=wait,
         )
     def pause_connector(self, table_name: str, connector_name: str):
@@ -372,7 +375,7 @@ method or use `Pipeline.resume()` to resume a paused pipeline."""
             return
         self.client.pause_pipeline(
-            self.name, "Unable to START the pipeline.\n", timeout_s
+            self.name, "Unable to START the pipeline.\n", wait=wait, timeout_s=timeout_s
         )
         self.__setup_output_listeners()
         self.resume(timeout_s=timeout_s)
@@ -506,9 +509,11 @@ metrics"""
                     queue.put(_CallbackRunnerInstruction.RanToCompletion)
             if len(self.views_tx) > 0:
-                for view_name, queue in self.views_tx.pop().items():
-                    # block until the callback runner has been stopped
-                    queue.join()
+                while self.views_tx:
+                    view = self.views_tx.pop()
+                    for view_name, queue in view.items():
+                        # block until the callback runner has been stopped
+                        queue.join()
         time.sleep(3)
         self.client.stop_pipeline(

{feldera-0.117.0 → feldera-0.119.0}/feldera/pipeline_builder.py RENAMED Viewed

@@ -45,8 +45,8 @@ class PipelineBuilder:
         self.udf_toml: str = udf_toml
         self.compilation_profile: CompilationProfile = compilation_profile
         self.runtime_config: RuntimeConfig = runtime_config
-        self.runtime_version: Optional[str] = runtime_version or os.environ.get(
-            "FELDERA_RUNTIME_VERSION"
+        self.runtime_version: Optional[str] = os.environ.get(
+            "FELDERA_RUNTIME_VERSION", runtime_version
         )
     def create(self) -> Pipeline:
@@ -113,6 +113,7 @@ class PipelineBuilder:
             udf_toml=self.udf_toml,
             program_config={
                 "profile": self.compilation_profile.value,
+                "runtime_version": self.runtime_version,
             },
             runtime_config=self.runtime_config.to_dict(),
         )

{feldera-0.117.0 → feldera-0.119.0}/feldera/rest/feldera_client.py RENAMED Viewed

@@ -8,7 +8,7 @@ from typing import Generator, Mapping
 from feldera.rest.config import Config
 from feldera.rest.feldera_config import FelderaConfig
-from feldera.rest.errors import FelderaTimeoutError
+from feldera.rest.errors import FelderaTimeoutError, FelderaAPIError
 from feldera.rest.pipeline import Pipeline
 from feldera.rest._httprequests import HttpRequests
 from feldera.rest._helpers import client_version
@@ -67,7 +67,7 @@ class FelderaClient:
             config = self.get_config()
             version = client_version()
             if config.version != version:
-                logging.warn(
+                logging.warning(
                     f"Client is on version {version} while server is at "
                     f"{config.version}. There could be incompatibilities."
                 )
@@ -593,7 +593,9 @@ Reason: The pipeline is in a STOPPED state due to the following error:
         update_format: str = "raw",
         json_flavor: Optional[str] = None,
         serialize: bool = True,
-    ):
+        wait: bool = True,
+        wait_timeout_s: Optional[float] = None,
+    ) -> str:
         """
         Insert data into a pipeline
@@ -610,6 +612,11 @@ Reason: The pipeline is in a STOPPED state due to the following error:
             "debezium_mysql", "snowflake", "kafka_connect_json_converter", "pandas"
         :param data: The data to insert
         :param serialize: If True, the data will be serialized to JSON. True by default
+        :param wait: If True, blocks until this input has been processed by the pipeline
+        :param wait_timeout_s: The timeout in seconds to wait for this set of
+            inputs to be processed by the pipeline. None by default
+        :returns: The completion token to this input.
         """
         if format not in ["json", "csv"]:
@@ -671,7 +678,7 @@ Reason: The pipeline is in a STOPPED state due to the following error:
             content_type = "text/csv"
             data = bytes(str(data), "utf-8")
-        self.http.post(
+        resp = self.http.post(
             path=f"/pipelines/{pipeline_name}/ingress/{table_name}",
             params=params,
             content_type=content_type,
@@ -679,6 +686,74 @@ Reason: The pipeline is in a STOPPED state due to the following error:
             serialize=serialize,
         )
+        token = resp.get("token")
+        if token is None:
+            raise FelderaAPIError("response did not contain a completion token", resp)
+        if not wait:
+            return token
+        self.wait_for_token(pipeline_name, token, timeout_s=wait_timeout_s)
+        return token
+    def wait_for_token(
+        self, pipeline_name: str, token: str, timeout_s: Optional[float] = 600
+    ):
+        """
+        Blocks until all records represented by this completion token have
+        been processed.
+        :param pipeline_name: The name of the pipeline
+        :param token: The token to check for completion
+        :param timeout_s: The amount of time in seconds to wait for the pipeline
+            to process these records. Default 600s
+        """
+        params = {
+            "token": token,
+        }
+        start = time.monotonic()
+        end = start + timeout_s if timeout_s else None
+        initial_backoff = 0.1
+        max_backoff = 5
+        exponent = 1.2
+        retries = 0
+        while True:
+            if end:
+                if time.monotonic() > end:
+                    raise FelderaTimeoutError(
+                        f"timeout error: pipeline '{pipeline_name}' did not"
+                        f" process records represented by token {token} within"
+                        f" {timeout_s}"
+                    )
+            resp = self.http.get(
+                path=f"/pipelines/{pipeline_name}/completion_status", params=params
+            )
+            status: Optional[str] = resp.get("status")
+            if status is None:
+                raise FelderaAPIError(
+                    f"got empty status when checking for completion status for token: {token}",
+                    resp,
+                )
+            if status.lower() == "complete":
+                break
+            elapsed = time.monotonic() - start
+            logging.debug(
+                f"still waiting for inputs represented by {token} to be processed; elapsed: {elapsed}s"
+            )
+            retries += 1
+            backoff = min(max_backoff, initial_backoff * (exponent**retries))
+            time.sleep(backoff)
     def listen_to_pipeline(
         self,
         pipeline_name: str,

{feldera-0.117.0 → feldera-0.119.0}/feldera/stats.py RENAMED Viewed

@@ -16,7 +16,7 @@ class PipelineStatistics:
         self.global_metrics: GlobalPipelineMetrics = GlobalPipelineMetrics()
         self.suspend_error: Optional[Any] = None
-        self.inputs: Mapping[List[InputEndpointStatus()]] = {}
+        self.inputs: Mapping[List[InputEndpointStatus]] = {}
         self.outputs: Mapping[List[OutputEndpointStatus]] = {}
     @classmethod

{feldera-0.117.0 → feldera-0.119.0}/feldera.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: feldera
-Version: 0.117.0
+Version: 0.119.0
 Summary: The feldera python client
 Author-email: Feldera Team <dev@feldera.com>
 License: MIT
@@ -84,7 +84,7 @@ cd python && python3 -m pytest tests/
 - This will detect and run all test files that match the pattern `test_*.py` or
   `*_test.py`.
 - By default, the tests expect a running Feldera instance at `http://localhost:8080`.
-  To override the default endpoint, set the `FELDERA_BASE_URL` environment variable.
+  To override the default endpoint, set the `FELDERA_HOST` environment variable.
 To run tests from a specific file:

{feldera-0.117.0 → feldera-0.119.0}/feldera.egg-info/SOURCES.txt RENAMED Viewed

@@ -24,8 +24,8 @@ feldera/rest/feldera_config.py
 feldera/rest/pipeline.py
 feldera/rest/sql_table.py
 feldera/rest/sql_view.py
+tests/test_checkpoint_sync.py
 tests/test_pipeline_builder.py
-tests/test_shared_pipeline0.py
-tests/test_shared_pipeline1.py
+tests/test_shared_pipeline.py
 tests/test_shared_pipeline_stress.py
 tests/test_udf.py

{feldera-0.117.0 → feldera-0.119.0}/pyproject.toml RENAMED Viewed

@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "feldera"
 readme = "README.md"
 description = "The feldera python client"
-version = "0.117.0"
+version = "0.119.0"
 license = { text = "MIT" }
 requires-python = ">=3.10"
 authors = [
@@ -39,7 +39,9 @@ Issues = "https://github.com/feldera/feldera/issues"
 dev-dependencies = [
     "kafka-python-ng==2.2.2",
     "pytest-timeout>=2.3.1",
+    "pytest-xdist>=3.8.0",
     "pytest>=8.3.5",
     "sphinx-rtd-theme==2.0.0",
     "sphinx==7.3.7",
+    "simplejson==3.20.1"
 ]

feldera-0.117.0/tests/test_shared_pipeline1.py → feldera-0.119.0/tests/test_checkpoint_sync.py RENAMED Viewed

@@ -3,6 +3,7 @@ from tests import enterprise_only
 from feldera.runtime_config import RuntimeConfig, Storage
 from typing import Optional
 import os
+import sys
 import time
 from uuid import uuid4
 import random
@@ -17,6 +18,7 @@ SECRET_KEY = "miniopasswd"
 def storage_cfg(
+    pipeline_name: str,
     endpoint: Optional[str] = None,
     start_from_checkpoint: Optional[str] = None,
     strict: bool = False,
@@ -27,7 +29,7 @@ def storage_cfg(
             "name": "file",
             "config": {
                 "sync": {
-                    "bucket": DEFAULT_BUCKET,
+                    "bucket": f"{DEFAULT_BUCKET}/{pipeline_name}",
                     "access_key": ACCESS_KEY,
                     "secret_key": SECRET_KEY if not auth_err else SECRET_KEY + "extra",
                     "provider": "Minio",
@@ -53,18 +55,44 @@ class TestCheckpointSync(SharedTestPipeline):
     ):
         """
         CREATE TABLE t0 (c0 INT, c1 VARCHAR);
-        CREATE MATERIALIZED VIEW v0 AS SELECT c0 FROM t0;
+        CREATE MATERIALIZED VIEW v0 AS SELECT * FROM t0;
         """
-        storage_config = storage_cfg()
-        self.set_runtime_config(RuntimeConfig(storage=Storage(config=storage_config)))
+        storage_config = storage_cfg(self.pipeline.name)
+        self.pipeline.set_runtime_config(
+            RuntimeConfig(storage=Storage(config=storage_config))
+        )
         self.pipeline.start()
         random.seed(time.time())
-        data = [{"c0": i, "c1": str(i)} for i in range(1, random.randint(10, 20))]
+        total = random.randint(10, 20)
+        data = [{"c0": i, "c1": str(i)} for i in range(1, total)]
         self.pipeline.input_json("t0", data)
-        self.pipeline.execute("INSERT INTO t0 VALUES (4, 'exists')")
+        self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
+        start = time.time()
+        timeout = 5
+        while True:
+            processed = self.pipeline.stats().global_metrics.total_processed_records
+            if processed == total:
+                break
+            if time.time() - start > timeout:
+                raise TimeoutError(
+                    f"timed out while waiting for pipeline to process {total} records"
+                )
+            time.sleep(0.1)
         got_before = list(self.pipeline.query("SELECT * FROM v0"))
+        print(f"{self.pipeline.name}: records: {total}, {got_before}", file=sys.stderr)
+        if len(got_before) != processed:
+            raise RuntimeError(
+                f"adhoc query returned {len(got_before)} but {processed} records were processed: {got_before}"
+            )
         self.pipeline.checkpoint(wait=True)
         uuid = self.pipeline.sync_checkpoint(wait=True)
@@ -79,14 +107,22 @@ class TestCheckpointSync(SharedTestPipeline):
         # Restart pipeline from checkpoint
         storage_config = storage_cfg(
+            pipeline_name=self.pipeline.name,
             start_from_checkpoint=uuid if from_uuid else "latest",
             auth_err=auth_err,
             strict=strict,
         )
-        self.set_runtime_config(RuntimeConfig(storage=Storage(config=storage_config)))
+        self.pipeline.set_runtime_config(
+            RuntimeConfig(storage=Storage(config=storage_config))
+        )
         self.pipeline.start()
         got_after = list(self.pipeline.query("SELECT * FROM v0"))
+        print(
+            f"{self.pipeline.name}: after: {len(got_after)}, {got_after}",
+            file=sys.stderr,
+        )
         if expect_empty:
             got_before = []

feldera-0.117.0/tests/test_shared_pipeline0.py → feldera-0.119.0/tests/test_shared_pipeline.py RENAMED Viewed

@@ -20,7 +20,6 @@ class TestPipeline(SharedTestPipeline):
         pass
     def __test_push_to_pipeline(self, data, format, array):
-        self.pipeline.stop(force=True)
         self.pipeline.start()
         TEST_CLIENT.push_to_pipeline(
             pipeline_name=self.pipeline.name,
@@ -29,8 +28,6 @@ class TestPipeline(SharedTestPipeline):
             array=array,
             data=data,
         )
-        TEST_CLIENT.pause_pipeline(self.pipeline.name)
-        TEST_CLIENT.stop_pipeline(self.pipeline.name, force=True)
     def test_push_to_pipeline_json(self):
         data = [
@@ -64,8 +61,6 @@ class TestPipeline(SharedTestPipeline):
         assert stats.get("global_metrics") is not None
         assert stats.get("inputs") is not None
         assert stats.get("outputs") is not None
-        TEST_CLIENT.pause_pipeline(self.pipeline.name)
-        TEST_CLIENT.stop_pipeline(self.pipeline.name, force=True)
     def test_adhoc_query_text(self):
         data = "1\n2\n"
@@ -85,7 +80,6 @@ class TestPipeline(SharedTestPipeline):
         got = "\n".join(resp)
         assert got in expected
-        TEST_CLIENT.stop_pipeline(self.pipeline.name, force=True)
     def test_adhoc_query_parquet(self):
         data = "1\n2\n"
@@ -93,7 +87,6 @@ class TestPipeline(SharedTestPipeline):
         TEST_CLIENT.push_to_pipeline(self.pipeline.name, "tbl", "csv", data)
         file = self.pipeline.name.split("-")[0]
         TEST_CLIENT.query_as_parquet(self.pipeline.name, "SELECT * FROM tbl", file)
-        TEST_CLIENT.stop_pipeline(self.pipeline.name, force=True)
         path = pathlib.Path(file + ".parquet")
         assert path.stat().st_size > 0
         os.remove(path)
@@ -106,7 +99,6 @@ class TestPipeline(SharedTestPipeline):
         expected = [{"id": 2}, {"id": 1}]
         got = list(resp)
         self.assertCountEqual(got, expected)
-        TEST_CLIENT.stop_pipeline(self.pipeline.name, force=True)
     def test_local(self):
         """
@@ -278,7 +270,7 @@ class TestPipeline(SharedTestPipeline):
         self.pipeline.start()
         data = [{"id": 2147483647}]
-        self.pipeline.input_json("tbl", data)
+        self.pipeline.input_json("tbl", data, wait=False)
         while True:
             status = self.pipeline.status()
             expected = PipelineStatus.STOPPED
@@ -552,13 +544,10 @@ class TestPipeline(SharedTestPipeline):
         }
         resources = Resources(config)
-        self.set_runtime_config(RuntimeConfig(resources=resources))
+        self.pipeline.set_runtime_config(RuntimeConfig(resources=resources))
         self.pipeline.start()
         got = TEST_CLIENT.get_pipeline(self.pipeline.name).runtime_config["resources"]
-        self.pipeline.stop(force=True)
         assert got == config
-        self.reset_runtime_config()
-        self.pipeline.clear_storage()
 if __name__ == "__main__":