feldera 0.117.0__tar.gz → 0.119.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of feldera might be problematic. Click here for more details.

Files changed (33) hide show
  1. {feldera-0.117.0 → feldera-0.119.0}/PKG-INFO +2 -2
  2. {feldera-0.117.0 → feldera-0.119.0}/README.md +1 -1
  3. {feldera-0.117.0 → feldera-0.119.0}/feldera/pipeline.py +9 -4
  4. {feldera-0.117.0 → feldera-0.119.0}/feldera/pipeline_builder.py +3 -2
  5. {feldera-0.117.0 → feldera-0.119.0}/feldera/rest/feldera_client.py +79 -4
  6. {feldera-0.117.0 → feldera-0.119.0}/feldera/stats.py +1 -1
  7. {feldera-0.117.0 → feldera-0.119.0}/feldera.egg-info/PKG-INFO +2 -2
  8. {feldera-0.117.0 → feldera-0.119.0}/feldera.egg-info/SOURCES.txt +2 -2
  9. {feldera-0.117.0 → feldera-0.119.0}/pyproject.toml +3 -1
  10. feldera-0.117.0/tests/test_shared_pipeline1.py → feldera-0.119.0/tests/test_checkpoint_sync.py +43 -7
  11. feldera-0.117.0/tests/test_shared_pipeline0.py → feldera-0.119.0/tests/test_shared_pipeline.py +2 -13
  12. {feldera-0.117.0 → feldera-0.119.0}/feldera/__init__.py +0 -0
  13. {feldera-0.117.0 → feldera-0.119.0}/feldera/_callback_runner.py +0 -0
  14. {feldera-0.117.0 → feldera-0.119.0}/feldera/_helpers.py +0 -0
  15. {feldera-0.117.0 → feldera-0.119.0}/feldera/enums.py +0 -0
  16. {feldera-0.117.0 → feldera-0.119.0}/feldera/output_handler.py +0 -0
  17. {feldera-0.117.0 → feldera-0.119.0}/feldera/rest/__init__.py +0 -0
  18. {feldera-0.117.0 → feldera-0.119.0}/feldera/rest/_helpers.py +0 -0
  19. {feldera-0.117.0 → feldera-0.119.0}/feldera/rest/_httprequests.py +0 -0
  20. {feldera-0.117.0 → feldera-0.119.0}/feldera/rest/config.py +0 -0
  21. {feldera-0.117.0 → feldera-0.119.0}/feldera/rest/errors.py +0 -0
  22. {feldera-0.117.0 → feldera-0.119.0}/feldera/rest/feldera_config.py +0 -0
  23. {feldera-0.117.0 → feldera-0.119.0}/feldera/rest/pipeline.py +0 -0
  24. {feldera-0.117.0 → feldera-0.119.0}/feldera/rest/sql_table.py +0 -0
  25. {feldera-0.117.0 → feldera-0.119.0}/feldera/rest/sql_view.py +0 -0
  26. {feldera-0.117.0 → feldera-0.119.0}/feldera/runtime_config.py +0 -0
  27. {feldera-0.117.0 → feldera-0.119.0}/feldera.egg-info/dependency_links.txt +0 -0
  28. {feldera-0.117.0 → feldera-0.119.0}/feldera.egg-info/requires.txt +0 -0
  29. {feldera-0.117.0 → feldera-0.119.0}/feldera.egg-info/top_level.txt +0 -0
  30. {feldera-0.117.0 → feldera-0.119.0}/setup.cfg +0 -0
  31. {feldera-0.117.0 → feldera-0.119.0}/tests/test_pipeline_builder.py +0 -0
  32. {feldera-0.117.0 → feldera-0.119.0}/tests/test_shared_pipeline_stress.py +0 -0
  33. {feldera-0.117.0 → feldera-0.119.0}/tests/test_udf.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: feldera
3
- Version: 0.117.0
3
+ Version: 0.119.0
4
4
  Summary: The feldera python client
5
5
  Author-email: Feldera Team <dev@feldera.com>
6
6
  License: MIT
@@ -84,7 +84,7 @@ cd python && python3 -m pytest tests/
84
84
  - This will detect and run all test files that match the pattern `test_*.py` or
85
85
  `*_test.py`.
86
86
  - By default, the tests expect a running Feldera instance at `http://localhost:8080`.
87
- To override the default endpoint, set the `FELDERA_BASE_URL` environment variable.
87
+ To override the default endpoint, set the `FELDERA_HOST` environment variable.
88
88
 
89
89
  To run tests from a specific file:
90
90
 
@@ -61,7 +61,7 @@ cd python && python3 -m pytest tests/
61
61
  - This will detect and run all test files that match the pattern `test_*.py` or
62
62
  `*_test.py`.
63
63
  - By default, the tests expect a running Feldera instance at `http://localhost:8080`.
64
- To override the default endpoint, set the `FELDERA_BASE_URL` environment variable.
64
+ To override the default endpoint, set the `FELDERA_HOST` environment variable.
65
65
 
66
66
  To run tests from a specific file:
67
67
 
@@ -142,6 +142,7 @@ class Pipeline:
142
142
  data: Dict | list,
143
143
  update_format: str = "raw",
144
144
  force: bool = False,
145
+ wait: bool = True,
145
146
  ):
146
147
  """
147
148
  Push this JSON data to the specified table of the pipeline.
@@ -155,6 +156,7 @@ class Pipeline:
155
156
  :param update_format: The update format of the JSON data to be pushed to the pipeline. Must be one of:
156
157
  "raw", "insert_delete". https://docs.feldera.com/formats/json#the-insertdelete-format
157
158
  :param force: `True` to push data even if the pipeline is paused. `False` by default.
159
+ :param wait: If True, blocks until this input has been processed by the pipeline
158
160
 
159
161
  :raises ValueError: If the update format is invalid.
160
162
  :raises FelderaAPIError: If the pipeline is not in a valid state to push data.
@@ -177,6 +179,7 @@ class Pipeline:
177
179
  update_format=update_format,
178
180
  array=array,
179
181
  force=force,
182
+ wait=wait,
180
183
  )
181
184
 
182
185
  def pause_connector(self, table_name: str, connector_name: str):
@@ -372,7 +375,7 @@ method or use `Pipeline.resume()` to resume a paused pipeline."""
372
375
  return
373
376
 
374
377
  self.client.pause_pipeline(
375
- self.name, "Unable to START the pipeline.\n", timeout_s
378
+ self.name, "Unable to START the pipeline.\n", wait=wait, timeout_s=timeout_s
376
379
  )
377
380
  self.__setup_output_listeners()
378
381
  self.resume(timeout_s=timeout_s)
@@ -506,9 +509,11 @@ metrics"""
506
509
  queue.put(_CallbackRunnerInstruction.RanToCompletion)
507
510
 
508
511
  if len(self.views_tx) > 0:
509
- for view_name, queue in self.views_tx.pop().items():
510
- # block until the callback runner has been stopped
511
- queue.join()
512
+ while self.views_tx:
513
+ view = self.views_tx.pop()
514
+ for view_name, queue in view.items():
515
+ # block until the callback runner has been stopped
516
+ queue.join()
512
517
 
513
518
  time.sleep(3)
514
519
  self.client.stop_pipeline(
@@ -45,8 +45,8 @@ class PipelineBuilder:
45
45
  self.udf_toml: str = udf_toml
46
46
  self.compilation_profile: CompilationProfile = compilation_profile
47
47
  self.runtime_config: RuntimeConfig = runtime_config
48
- self.runtime_version: Optional[str] = runtime_version or os.environ.get(
49
- "FELDERA_RUNTIME_VERSION"
48
+ self.runtime_version: Optional[str] = os.environ.get(
49
+ "FELDERA_RUNTIME_VERSION", runtime_version
50
50
  )
51
51
 
52
52
  def create(self) -> Pipeline:
@@ -113,6 +113,7 @@ class PipelineBuilder:
113
113
  udf_toml=self.udf_toml,
114
114
  program_config={
115
115
  "profile": self.compilation_profile.value,
116
+ "runtime_version": self.runtime_version,
116
117
  },
117
118
  runtime_config=self.runtime_config.to_dict(),
118
119
  )
@@ -8,7 +8,7 @@ from typing import Generator, Mapping
8
8
 
9
9
  from feldera.rest.config import Config
10
10
  from feldera.rest.feldera_config import FelderaConfig
11
- from feldera.rest.errors import FelderaTimeoutError
11
+ from feldera.rest.errors import FelderaTimeoutError, FelderaAPIError
12
12
  from feldera.rest.pipeline import Pipeline
13
13
  from feldera.rest._httprequests import HttpRequests
14
14
  from feldera.rest._helpers import client_version
@@ -67,7 +67,7 @@ class FelderaClient:
67
67
  config = self.get_config()
68
68
  version = client_version()
69
69
  if config.version != version:
70
- logging.warn(
70
+ logging.warning(
71
71
  f"Client is on version {version} while server is at "
72
72
  f"{config.version}. There could be incompatibilities."
73
73
  )
@@ -593,7 +593,9 @@ Reason: The pipeline is in a STOPPED state due to the following error:
593
593
  update_format: str = "raw",
594
594
  json_flavor: Optional[str] = None,
595
595
  serialize: bool = True,
596
- ):
596
+ wait: bool = True,
597
+ wait_timeout_s: Optional[float] = None,
598
+ ) -> str:
597
599
  """
598
600
  Insert data into a pipeline
599
601
 
@@ -610,6 +612,11 @@ Reason: The pipeline is in a STOPPED state due to the following error:
610
612
  "debezium_mysql", "snowflake", "kafka_connect_json_converter", "pandas"
611
613
  :param data: The data to insert
612
614
  :param serialize: If True, the data will be serialized to JSON. True by default
615
+ :param wait: If True, blocks until this input has been processed by the pipeline
616
+ :param wait_timeout_s: The timeout in seconds to wait for this set of
617
+ inputs to be processed by the pipeline. None by default
618
+
619
+ :returns: The completion token to this input.
613
620
  """
614
621
 
615
622
  if format not in ["json", "csv"]:
@@ -671,7 +678,7 @@ Reason: The pipeline is in a STOPPED state due to the following error:
671
678
  content_type = "text/csv"
672
679
  data = bytes(str(data), "utf-8")
673
680
 
674
- self.http.post(
681
+ resp = self.http.post(
675
682
  path=f"/pipelines/{pipeline_name}/ingress/{table_name}",
676
683
  params=params,
677
684
  content_type=content_type,
@@ -679,6 +686,74 @@ Reason: The pipeline is in a STOPPED state due to the following error:
679
686
  serialize=serialize,
680
687
  )
681
688
 
689
+ token = resp.get("token")
690
+ if token is None:
691
+ raise FelderaAPIError("response did not contain a completion token", resp)
692
+
693
+ if not wait:
694
+ return token
695
+
696
+ self.wait_for_token(pipeline_name, token, timeout_s=wait_timeout_s)
697
+
698
+ return token
699
+
700
+ def wait_for_token(
701
+ self, pipeline_name: str, token: str, timeout_s: Optional[float] = 600
702
+ ):
703
+ """
704
+ Blocks until all records represented by this completion token have
705
+ been processed.
706
+
707
+ :param pipeline_name: The name of the pipeline
708
+ :param token: The token to check for completion
709
+ :param timeout_s: The amount of time in seconds to wait for the pipeline
710
+ to process these records. Default 600s
711
+ """
712
+
713
+ params = {
714
+ "token": token,
715
+ }
716
+
717
+ start = time.monotonic()
718
+ end = start + timeout_s if timeout_s else None
719
+ initial_backoff = 0.1
720
+ max_backoff = 5
721
+ exponent = 1.2
722
+ retries = 0
723
+
724
+ while True:
725
+ if end:
726
+ if time.monotonic() > end:
727
+ raise FelderaTimeoutError(
728
+ f"timeout error: pipeline '{pipeline_name}' did not"
729
+ f" process records represented by token {token} within"
730
+ f" {timeout_s}"
731
+ )
732
+
733
+ resp = self.http.get(
734
+ path=f"/pipelines/{pipeline_name}/completion_status", params=params
735
+ )
736
+
737
+ status: Optional[str] = resp.get("status")
738
+ if status is None:
739
+ raise FelderaAPIError(
740
+ f"got empty status when checking for completion status for token: {token}",
741
+ resp,
742
+ )
743
+
744
+ if status.lower() == "complete":
745
+ break
746
+
747
+ elapsed = time.monotonic() - start
748
+ logging.debug(
749
+ f"still waiting for inputs represented by {token} to be processed; elapsed: {elapsed}s"
750
+ )
751
+
752
+ retries += 1
753
+ backoff = min(max_backoff, initial_backoff * (exponent**retries))
754
+
755
+ time.sleep(backoff)
756
+
682
757
  def listen_to_pipeline(
683
758
  self,
684
759
  pipeline_name: str,
@@ -16,7 +16,7 @@ class PipelineStatistics:
16
16
 
17
17
  self.global_metrics: GlobalPipelineMetrics = GlobalPipelineMetrics()
18
18
  self.suspend_error: Optional[Any] = None
19
- self.inputs: Mapping[List[InputEndpointStatus()]] = {}
19
+ self.inputs: Mapping[List[InputEndpointStatus]] = {}
20
20
  self.outputs: Mapping[List[OutputEndpointStatus]] = {}
21
21
 
22
22
  @classmethod
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: feldera
3
- Version: 0.117.0
3
+ Version: 0.119.0
4
4
  Summary: The feldera python client
5
5
  Author-email: Feldera Team <dev@feldera.com>
6
6
  License: MIT
@@ -84,7 +84,7 @@ cd python && python3 -m pytest tests/
84
84
  - This will detect and run all test files that match the pattern `test_*.py` or
85
85
  `*_test.py`.
86
86
  - By default, the tests expect a running Feldera instance at `http://localhost:8080`.
87
- To override the default endpoint, set the `FELDERA_BASE_URL` environment variable.
87
+ To override the default endpoint, set the `FELDERA_HOST` environment variable.
88
88
 
89
89
  To run tests from a specific file:
90
90
 
@@ -24,8 +24,8 @@ feldera/rest/feldera_config.py
24
24
  feldera/rest/pipeline.py
25
25
  feldera/rest/sql_table.py
26
26
  feldera/rest/sql_view.py
27
+ tests/test_checkpoint_sync.py
27
28
  tests/test_pipeline_builder.py
28
- tests/test_shared_pipeline0.py
29
- tests/test_shared_pipeline1.py
29
+ tests/test_shared_pipeline.py
30
30
  tests/test_shared_pipeline_stress.py
31
31
  tests/test_udf.py
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
  name = "feldera"
7
7
  readme = "README.md"
8
8
  description = "The feldera python client"
9
- version = "0.117.0"
9
+ version = "0.119.0"
10
10
  license = { text = "MIT" }
11
11
  requires-python = ">=3.10"
12
12
  authors = [
@@ -39,7 +39,9 @@ Issues = "https://github.com/feldera/feldera/issues"
39
39
  dev-dependencies = [
40
40
  "kafka-python-ng==2.2.2",
41
41
  "pytest-timeout>=2.3.1",
42
+ "pytest-xdist>=3.8.0",
42
43
  "pytest>=8.3.5",
43
44
  "sphinx-rtd-theme==2.0.0",
44
45
  "sphinx==7.3.7",
46
+ "simplejson==3.20.1"
45
47
  ]
@@ -3,6 +3,7 @@ from tests import enterprise_only
3
3
  from feldera.runtime_config import RuntimeConfig, Storage
4
4
  from typing import Optional
5
5
  import os
6
+ import sys
6
7
  import time
7
8
  from uuid import uuid4
8
9
  import random
@@ -17,6 +18,7 @@ SECRET_KEY = "miniopasswd"
17
18
 
18
19
 
19
20
  def storage_cfg(
21
+ pipeline_name: str,
20
22
  endpoint: Optional[str] = None,
21
23
  start_from_checkpoint: Optional[str] = None,
22
24
  strict: bool = False,
@@ -27,7 +29,7 @@ def storage_cfg(
27
29
  "name": "file",
28
30
  "config": {
29
31
  "sync": {
30
- "bucket": DEFAULT_BUCKET,
32
+ "bucket": f"{DEFAULT_BUCKET}/{pipeline_name}",
31
33
  "access_key": ACCESS_KEY,
32
34
  "secret_key": SECRET_KEY if not auth_err else SECRET_KEY + "extra",
33
35
  "provider": "Minio",
@@ -53,18 +55,44 @@ class TestCheckpointSync(SharedTestPipeline):
53
55
  ):
54
56
  """
55
57
  CREATE TABLE t0 (c0 INT, c1 VARCHAR);
56
- CREATE MATERIALIZED VIEW v0 AS SELECT c0 FROM t0;
58
+ CREATE MATERIALIZED VIEW v0 AS SELECT * FROM t0;
57
59
  """
58
- storage_config = storage_cfg()
59
60
 
60
- self.set_runtime_config(RuntimeConfig(storage=Storage(config=storage_config)))
61
+ storage_config = storage_cfg(self.pipeline.name)
62
+
63
+ self.pipeline.set_runtime_config(
64
+ RuntimeConfig(storage=Storage(config=storage_config))
65
+ )
61
66
  self.pipeline.start()
62
67
 
63
68
  random.seed(time.time())
64
- data = [{"c0": i, "c1": str(i)} for i in range(1, random.randint(10, 20))]
69
+ total = random.randint(10, 20)
70
+ data = [{"c0": i, "c1": str(i)} for i in range(1, total)]
65
71
  self.pipeline.input_json("t0", data)
66
- self.pipeline.execute("INSERT INTO t0 VALUES (4, 'exists')")
72
+ self.pipeline.execute("INSERT INTO t0 VALUES (21, 'exists')")
73
+
74
+ start = time.time()
75
+ timeout = 5
76
+
77
+ while True:
78
+ processed = self.pipeline.stats().global_metrics.total_processed_records
79
+ if processed == total:
80
+ break
81
+
82
+ if time.time() - start > timeout:
83
+ raise TimeoutError(
84
+ f"timed out while waiting for pipeline to process {total} records"
85
+ )
86
+
87
+ time.sleep(0.1)
88
+
67
89
  got_before = list(self.pipeline.query("SELECT * FROM v0"))
90
+ print(f"{self.pipeline.name}: records: {total}, {got_before}", file=sys.stderr)
91
+
92
+ if len(got_before) != processed:
93
+ raise RuntimeError(
94
+ f"adhoc query returned {len(got_before)} but {processed} records were processed: {got_before}"
95
+ )
68
96
 
69
97
  self.pipeline.checkpoint(wait=True)
70
98
  uuid = self.pipeline.sync_checkpoint(wait=True)
@@ -79,14 +107,22 @@ class TestCheckpointSync(SharedTestPipeline):
79
107
 
80
108
  # Restart pipeline from checkpoint
81
109
  storage_config = storage_cfg(
110
+ pipeline_name=self.pipeline.name,
82
111
  start_from_checkpoint=uuid if from_uuid else "latest",
83
112
  auth_err=auth_err,
84
113
  strict=strict,
85
114
  )
86
- self.set_runtime_config(RuntimeConfig(storage=Storage(config=storage_config)))
115
+ self.pipeline.set_runtime_config(
116
+ RuntimeConfig(storage=Storage(config=storage_config))
117
+ )
87
118
  self.pipeline.start()
88
119
  got_after = list(self.pipeline.query("SELECT * FROM v0"))
89
120
 
121
+ print(
122
+ f"{self.pipeline.name}: after: {len(got_after)}, {got_after}",
123
+ file=sys.stderr,
124
+ )
125
+
90
126
  if expect_empty:
91
127
  got_before = []
92
128
 
@@ -20,7 +20,6 @@ class TestPipeline(SharedTestPipeline):
20
20
  pass
21
21
 
22
22
  def __test_push_to_pipeline(self, data, format, array):
23
- self.pipeline.stop(force=True)
24
23
  self.pipeline.start()
25
24
  TEST_CLIENT.push_to_pipeline(
26
25
  pipeline_name=self.pipeline.name,
@@ -29,8 +28,6 @@ class TestPipeline(SharedTestPipeline):
29
28
  array=array,
30
29
  data=data,
31
30
  )
32
- TEST_CLIENT.pause_pipeline(self.pipeline.name)
33
- TEST_CLIENT.stop_pipeline(self.pipeline.name, force=True)
34
31
 
35
32
  def test_push_to_pipeline_json(self):
36
33
  data = [
@@ -64,8 +61,6 @@ class TestPipeline(SharedTestPipeline):
64
61
  assert stats.get("global_metrics") is not None
65
62
  assert stats.get("inputs") is not None
66
63
  assert stats.get("outputs") is not None
67
- TEST_CLIENT.pause_pipeline(self.pipeline.name)
68
- TEST_CLIENT.stop_pipeline(self.pipeline.name, force=True)
69
64
 
70
65
  def test_adhoc_query_text(self):
71
66
  data = "1\n2\n"
@@ -85,7 +80,6 @@ class TestPipeline(SharedTestPipeline):
85
80
 
86
81
  got = "\n".join(resp)
87
82
  assert got in expected
88
- TEST_CLIENT.stop_pipeline(self.pipeline.name, force=True)
89
83
 
90
84
  def test_adhoc_query_parquet(self):
91
85
  data = "1\n2\n"
@@ -93,7 +87,6 @@ class TestPipeline(SharedTestPipeline):
93
87
  TEST_CLIENT.push_to_pipeline(self.pipeline.name, "tbl", "csv", data)
94
88
  file = self.pipeline.name.split("-")[0]
95
89
  TEST_CLIENT.query_as_parquet(self.pipeline.name, "SELECT * FROM tbl", file)
96
- TEST_CLIENT.stop_pipeline(self.pipeline.name, force=True)
97
90
  path = pathlib.Path(file + ".parquet")
98
91
  assert path.stat().st_size > 0
99
92
  os.remove(path)
@@ -106,7 +99,6 @@ class TestPipeline(SharedTestPipeline):
106
99
  expected = [{"id": 2}, {"id": 1}]
107
100
  got = list(resp)
108
101
  self.assertCountEqual(got, expected)
109
- TEST_CLIENT.stop_pipeline(self.pipeline.name, force=True)
110
102
 
111
103
  def test_local(self):
112
104
  """
@@ -278,7 +270,7 @@ class TestPipeline(SharedTestPipeline):
278
270
 
279
271
  self.pipeline.start()
280
272
  data = [{"id": 2147483647}]
281
- self.pipeline.input_json("tbl", data)
273
+ self.pipeline.input_json("tbl", data, wait=False)
282
274
  while True:
283
275
  status = self.pipeline.status()
284
276
  expected = PipelineStatus.STOPPED
@@ -552,13 +544,10 @@ class TestPipeline(SharedTestPipeline):
552
544
  }
553
545
 
554
546
  resources = Resources(config)
555
- self.set_runtime_config(RuntimeConfig(resources=resources))
547
+ self.pipeline.set_runtime_config(RuntimeConfig(resources=resources))
556
548
  self.pipeline.start()
557
549
  got = TEST_CLIENT.get_pipeline(self.pipeline.name).runtime_config["resources"]
558
- self.pipeline.stop(force=True)
559
550
  assert got == config
560
- self.reset_runtime_config()
561
- self.pipeline.clear_storage()
562
551
 
563
552
 
564
553
  if __name__ == "__main__":
File without changes
File without changes
File without changes
File without changes
File without changes