feldera 0.168.0__tar.gz → 0.170.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of feldera might be problematic. Click here for more details.

Files changed (32) hide show
  1. {feldera-0.168.0 → feldera-0.170.0}/PKG-INFO +1 -1
  2. feldera-0.170.0/feldera/_callback_runner.py +95 -0
  3. {feldera-0.168.0 → feldera-0.170.0}/feldera/_helpers.py +7 -1
  4. {feldera-0.168.0 → feldera-0.170.0}/feldera/enums.py +12 -0
  5. {feldera-0.168.0 → feldera-0.170.0}/feldera/output_handler.py +4 -0
  6. {feldera-0.168.0 → feldera-0.170.0}/feldera/pipeline.py +44 -1
  7. {feldera-0.168.0 → feldera-0.170.0}/feldera/rest/feldera_client.py +62 -18
  8. {feldera-0.168.0 → feldera-0.170.0}/feldera.egg-info/PKG-INFO +1 -1
  9. {feldera-0.168.0 → feldera-0.170.0}/pyproject.toml +1 -1
  10. feldera-0.168.0/feldera/_callback_runner.py +0 -69
  11. {feldera-0.168.0 → feldera-0.170.0}/README.md +0 -0
  12. {feldera-0.168.0 → feldera-0.170.0}/feldera/__init__.py +0 -0
  13. {feldera-0.168.0 → feldera-0.170.0}/feldera/pipeline_builder.py +0 -0
  14. {feldera-0.168.0 → feldera-0.170.0}/feldera/rest/__init__.py +0 -0
  15. {feldera-0.168.0 → feldera-0.170.0}/feldera/rest/_helpers.py +0 -0
  16. {feldera-0.168.0 → feldera-0.170.0}/feldera/rest/_httprequests.py +0 -0
  17. {feldera-0.168.0 → feldera-0.170.0}/feldera/rest/config.py +0 -0
  18. {feldera-0.168.0 → feldera-0.170.0}/feldera/rest/errors.py +0 -0
  19. {feldera-0.168.0 → feldera-0.170.0}/feldera/rest/feldera_config.py +0 -0
  20. {feldera-0.168.0 → feldera-0.170.0}/feldera/rest/pipeline.py +0 -0
  21. {feldera-0.168.0 → feldera-0.170.0}/feldera/rest/sql_table.py +0 -0
  22. {feldera-0.168.0 → feldera-0.170.0}/feldera/rest/sql_view.py +0 -0
  23. {feldera-0.168.0 → feldera-0.170.0}/feldera/runtime_config.py +0 -0
  24. {feldera-0.168.0 → feldera-0.170.0}/feldera/stats.py +0 -0
  25. {feldera-0.168.0 → feldera-0.170.0}/feldera/tests/test_datafusionize.py +0 -0
  26. {feldera-0.168.0 → feldera-0.170.0}/feldera/testutils.py +0 -0
  27. {feldera-0.168.0 → feldera-0.170.0}/feldera/testutils_oidc.py +0 -0
  28. {feldera-0.168.0 → feldera-0.170.0}/feldera.egg-info/SOURCES.txt +0 -0
  29. {feldera-0.168.0 → feldera-0.170.0}/feldera.egg-info/dependency_links.txt +0 -0
  30. {feldera-0.168.0 → feldera-0.170.0}/feldera.egg-info/requires.txt +0 -0
  31. {feldera-0.168.0 → feldera-0.170.0}/feldera.egg-info/top_level.txt +0 -0
  32. {feldera-0.168.0 → feldera-0.170.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: feldera
3
- Version: 0.168.0
3
+ Version: 0.170.0
4
4
  Summary: The feldera python client
5
5
  Author-email: Feldera Team <dev@feldera.com>
6
6
  License: MIT
@@ -0,0 +1,95 @@
1
+ from threading import Thread, Event
2
+ from typing import Callable, List, Optional, Mapping, Any
3
+
4
+ import pandas as pd
5
+ from feldera import FelderaClient
6
+ from feldera._helpers import dataframe_from_response
7
+ from feldera.enums import PipelineFieldSelector
8
+ from feldera.rest.sql_table import SQLTable
9
+ from feldera.rest.sql_view import SQLView
10
+ from feldera.rest.pipeline import Pipeline
11
+
12
+
13
+ class CallbackRunner(Thread):
14
+ def __init__(
15
+ self,
16
+ client: FelderaClient,
17
+ pipeline_name: str,
18
+ view_name: str,
19
+ callback: Callable[[pd.DataFrame, int], None],
20
+ exception_callback: Callable[[BaseException], None],
21
+ event: Event,
22
+ ):
23
+ """
24
+ :param client: The :class:`.FelderaClient` to use.
25
+ :param pipeline_name: The name of the current pipeline.
26
+ :param view_name: The name of the view we are listening to.
27
+ :param callback: The callback function to call on the data we receive.
28
+ :param exception_callback: The callback function to call when an exception occurs.
29
+ :param event: The event to wait for before starting the callback runner.
30
+ """
31
+
32
+ super().__init__()
33
+ self.daemon = True
34
+ self.client: FelderaClient = client
35
+ self.pipeline_name: str = pipeline_name
36
+ self.view_name: str = view_name
37
+ self.callback: Callable[[pd.DataFrame, int], None] = callback
38
+ self.exception_callback: Callable[[BaseException], None] = exception_callback
39
+ self.event: Event = event
40
+
41
+ self.pipeline: Pipeline = self.client.get_pipeline(
42
+ self.pipeline_name, PipelineFieldSelector.ALL
43
+ )
44
+
45
+ view_schema = None
46
+
47
+ schemas: List[SQLTable | SQLView] = self.pipeline.tables + self.pipeline.views
48
+ for schema in schemas:
49
+ if schema.name == self.view_name:
50
+ view_schema = schema
51
+ break
52
+
53
+ if view_schema is None:
54
+ raise ValueError(
55
+ f"Table or View {self.view_name} not found in the pipeline schema."
56
+ )
57
+
58
+ self.schema: SQLTable | SQLView = view_schema
59
+
60
+ def to_callback(self, chunk: Mapping[str, Any]):
61
+ data: Optional[list[Mapping[str, Any]]] = chunk.get("json_data")
62
+ seq_no: Optional[int] = chunk.get("sequence_number")
63
+ if data is not None and seq_no is not None:
64
+ self.callback(dataframe_from_response([data], self.schema.fields), seq_no)
65
+
66
+ def run(self):
67
+ """
68
+ The main loop of the thread. Listens for data and calls the callback function on each chunk of data received.
69
+
70
+ :meta private:
71
+ """
72
+
73
+ try:
74
+ gen_obj = self.client.listen_to_pipeline(
75
+ self.pipeline_name,
76
+ self.view_name,
77
+ format="json",
78
+ case_sensitive=self.schema.case_sensitive,
79
+ )
80
+
81
+ iterator = gen_obj()
82
+
83
+ # Trigger the HTTP call
84
+ chunk = next(iterator)
85
+
86
+ # Unblock the main thread
87
+ self.event.set()
88
+
89
+ self.to_callback(chunk)
90
+
91
+ for chunk in iterator:
92
+ self.to_callback(chunk)
93
+
94
+ except BaseException as e:
95
+ self.exception_callback(e)
@@ -2,6 +2,7 @@ import uuid
2
2
 
3
3
  import pandas as pd
4
4
  from decimal import Decimal
5
+ from typing import Mapping, Any
5
6
 
6
7
 
7
8
  def sql_type_to_pandas_type(sql_type: str):
@@ -60,9 +61,14 @@ def ensure_dataframe_has_columns(df: pd.DataFrame):
60
61
  )
61
62
 
62
63
 
63
- def dataframe_from_response(buffer: list[list[dict]], fields: list[dict]):
64
+ def dataframe_from_response(
65
+ buffer: list[list[Mapping[str, Any]]], fields: list[Mapping[str, Any]]
66
+ ):
64
67
  """
65
68
  Converts the response from Feldera to a pandas DataFrame.
69
+
70
+ :param buffer: A buffer of a list of JSON formatted output of the view you are listening to.
71
+ :param fields: The schema (list of fields) of the view you are listening to.
66
72
  """
67
73
 
68
74
  pd_schema = {}
@@ -352,3 +352,15 @@ class BootstrapPolicy(Enum):
352
352
  AWAIT_APPROVAL = "await_approval"
353
353
  ALLOW = "allow"
354
354
  REJECT = "reject"
355
+
356
+
357
+ class CompletionTokenStatus(Enum):
358
+ COMPLETE = "complete"
359
+ """
360
+ Feldera has completed processing all inputs represented by this token.
361
+ """
362
+
363
+ IN_PROGRESS = "inprogress"
364
+ """
365
+ Feldera is still processing the inputs represented by this token.
366
+ """
@@ -1,6 +1,7 @@
1
1
  import pandas as pd
2
2
 
3
3
  from typing import Optional
4
+ from threading import Event
4
5
 
5
6
  from feldera import FelderaClient
6
7
  from feldera._callback_runner import CallbackRunner
@@ -23,6 +24,7 @@ class OutputHandler:
23
24
  self.view_name: str = view_name
24
25
  self.buffer: list[pd.DataFrame] = []
25
26
  self.exception: Optional[BaseException] = None
27
+ self.event = Event()
26
28
 
27
29
  # the callback that is passed to the `CallbackRunner`
28
30
  def callback(df: pd.DataFrame, _: int):
@@ -39,6 +41,7 @@ class OutputHandler:
39
41
  self.view_name,
40
42
  callback,
41
43
  exception_callback,
44
+ self.event,
42
45
  )
43
46
 
44
47
  def start(self):
@@ -47,6 +50,7 @@ class OutputHandler:
47
50
  """
48
51
 
49
52
  self.handler.start()
53
+ _ = self.event.wait()
50
54
 
51
55
  def to_pandas(self, clear_buffer: bool = True):
52
56
  """
@@ -7,11 +7,13 @@ import pandas
7
7
  from uuid import UUID
8
8
 
9
9
  from typing import List, Dict, Callable, Optional, Generator, Mapping, Any
10
+ from threading import Event
10
11
  from collections import deque
11
12
 
12
13
  from feldera.rest.errors import FelderaAPIError
13
14
  from feldera.enums import (
14
15
  BootstrapPolicy,
16
+ CompletionTokenStatus,
15
17
  PipelineFieldSelector,
16
18
  PipelineStatus,
17
19
  ProgramStatus,
@@ -294,10 +296,12 @@ class Pipeline:
294
296
  if self.status() not in [PipelineStatus.RUNNING, PipelineStatus.PAUSED]:
295
297
  raise RuntimeError("Pipeline must be running or paused to listen to output")
296
298
 
299
+ event = Event()
297
300
  handler = CallbackRunner(
298
- self.client, self.name, view_name, callback, lambda exception: None
301
+ self.client, self.name, view_name, callback, lambda exception: None, event
299
302
  )
300
303
  handler.start()
304
+ event.wait()
301
305
 
302
306
  def wait_for_completion(
303
307
  self, force_stop: bool = False, timeout_s: float | None = None
@@ -696,6 +700,17 @@ metrics"""
696
700
  err.message = f"Pipeline with name {name} not found"
697
701
  raise err
698
702
 
703
+ @staticmethod
704
+ def all(client: FelderaClient) -> List["Pipeline"]:
705
+ """
706
+ Get all pipelines.
707
+
708
+ :param client: The FelderaClient instance.
709
+ :return: A list of Pipeline objects.
710
+ """
711
+
712
+ return [Pipeline._from_inner(p, client) for p in client.pipelines()]
713
+
699
714
  def checkpoint(self, wait: bool = False, timeout_s: Optional[float] = None) -> int:
700
715
  """
701
716
  Checkpoints this pipeline.
@@ -1377,3 +1392,31 @@ pipeline '{self.name}' to sync checkpoint '{uuid}'"""
1377
1392
  print(f"Support bundle written to {path}")
1378
1393
 
1379
1394
  return support_bundle_bytes
1395
+
1396
+ def generate_completion_token(self, table_name: str, connector_name: str) -> str:
1397
+ """
1398
+ Returns a completion token that can be passed to :meth:`.Pipeline.completion_token_status` to
1399
+ check whether the pipeline has finished processing all inputs received from the connector before
1400
+ the token was generated.
1401
+ """
1402
+
1403
+ return self.client.generate_completion_token(
1404
+ self.name, table_name, connector_name
1405
+ )
1406
+
1407
+ def completion_token_status(self, token: str) -> CompletionTokenStatus:
1408
+ """
1409
+ Returns the status of the completion token.
1410
+ """
1411
+
1412
+ if self.client.completion_token_processed(self.name, token):
1413
+ return CompletionTokenStatus.COMPLETE
1414
+ else:
1415
+ return CompletionTokenStatus.IN_PROGRESS
1416
+
1417
+ def wait_for_token(self, token: str):
1418
+ """
1419
+ Blocks until the pipeline processes all inputs represented by the completion token.
1420
+ """
1421
+
1422
+ self.client.wait_for_token(self.name, token)
@@ -126,13 +126,15 @@ class FelderaClient:
126
126
 
127
127
  return runtime_config
128
128
 
129
- def pipelines(self) -> list[Pipeline]:
129
+ def pipelines(
130
+ self, selector: PipelineFieldSelector = PipelineFieldSelector.STATUS
131
+ ) -> list[Pipeline]:
130
132
  """
131
133
  Get all pipelines
132
134
  """
133
135
 
134
136
  resp = self.http.get(
135
- path="/pipelines",
137
+ path=f"/pipelines?selector={selector.value}",
136
138
  )
137
139
 
138
140
  return [Pipeline.from_dict(pipeline) for pipeline in resp]
@@ -883,6 +885,35 @@ Reason: The pipeline is in a STOPPED state due to the following error:
883
885
 
884
886
  return token
885
887
 
888
+ def completion_token_processed(self, pipeline_name: str, token: str) -> bool:
889
+ """
890
+ Check whether the pipeline has finished processing all inputs received from the connector before
891
+ the token was generated.
892
+
893
+ :param pipeline_name: The name of the pipeline
894
+ :param token: The token to check for completion
895
+ :return: True if the pipeline has finished processing all inputs represented by the token, False otherwise
896
+ """
897
+
898
+ params = {
899
+ "token": token,
900
+ }
901
+
902
+ resp = self.http.get(
903
+ path=f"/pipelines/{quote(pipeline_name, safe='')}/completion_status",
904
+ params=params,
905
+ )
906
+
907
+ status: Optional[str] = resp.get("status")
908
+
909
+ if status is None:
910
+ raise FelderaAPIError(
911
+ f"got empty status when checking for completion status for token: {token}",
912
+ resp,
913
+ )
914
+
915
+ return status.lower() == "complete"
916
+
886
917
  def wait_for_token(
887
918
  self, pipeline_name: str, token: str, timeout_s: Optional[float] = None
888
919
  ):
@@ -896,10 +927,6 @@ Reason: The pipeline is in a STOPPED state due to the following error:
896
927
  to process these records.
897
928
  """
898
929
 
899
- params = {
900
- "token": token,
901
- }
902
-
903
930
  start = time.monotonic()
904
931
  end = start + timeout_s if timeout_s else None
905
932
  initial_backoff = 0.1
@@ -916,18 +943,7 @@ Reason: The pipeline is in a STOPPED state due to the following error:
916
943
  + f" {timeout_s}"
917
944
  )
918
945
 
919
- resp = self.http.get(
920
- path=f"/pipelines/{pipeline_name}/completion_status", params=params
921
- )
922
-
923
- status: Optional[str] = resp.get("status")
924
- if status is None:
925
- raise FelderaAPIError(
926
- f"got empty status when checking for completion status for token: {token}",
927
- resp,
928
- )
929
-
930
- if status.lower() == "complete":
946
+ if self.completion_token_processed(pipeline_name, token):
931
947
  break
932
948
 
933
949
  elapsed = time.monotonic() - start
@@ -1191,3 +1207,31 @@ Reason: The pipeline is in a STOPPED state due to the following error:
1191
1207
  buffer += chunk
1192
1208
 
1193
1209
  return buffer
1210
+
1211
+ def generate_completion_token(
1212
+ self, pipeline_name: str, table_name: str, connector_name: str
1213
+ ) -> str:
1214
+ """
1215
+ Generate a completion token that can be passed to :meth:`.FelderaClient.completion_token_processed` to
1216
+ check whether the pipeline has finished processing all inputs received from the connector before
1217
+ the token was generated.
1218
+
1219
+ :param pipeline_name: The name of the pipeline
1220
+ :param table_name: The name of the table associated with this connector.
1221
+ :param connector_name: The name of the connector.
1222
+
1223
+ :raises FelderaAPIError: If the connector cannot be found, or if the pipeline is not running.
1224
+ """
1225
+
1226
+ resp = self.http.get(
1227
+ path=f"/pipelines/{pipeline_name}/tables/{table_name}/connectors/{connector_name}/completion_token",
1228
+ )
1229
+
1230
+ token: str | None = resp.get("token")
1231
+
1232
+ if token is None:
1233
+ raise ValueError(
1234
+ "got invalid response from feldera when generating completion token"
1235
+ )
1236
+
1237
+ return token
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: feldera
3
- Version: 0.168.0
3
+ Version: 0.170.0
4
4
  Summary: The feldera python client
5
5
  Author-email: Feldera Team <dev@feldera.com>
6
6
  License: MIT
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
  name = "feldera"
7
7
  readme = "README.md"
8
8
  description = "The feldera python client"
9
- version = "0.168.0"
9
+ version = "0.170.0"
10
10
  license = { text = "MIT" }
11
11
  requires-python = ">=3.10"
12
12
  authors = [
@@ -1,69 +0,0 @@
1
- from threading import Thread
2
- from typing import Callable, Optional
3
-
4
- import pandas as pd
5
- from feldera import FelderaClient
6
- from feldera._helpers import dataframe_from_response
7
- from feldera.enums import PipelineFieldSelector
8
-
9
-
10
- class CallbackRunner(Thread):
11
- def __init__(
12
- self,
13
- client: FelderaClient,
14
- pipeline_name: str,
15
- view_name: str,
16
- callback: Callable[[pd.DataFrame, int], None],
17
- exception_callback: Callable[[BaseException], None],
18
- ):
19
- super().__init__()
20
- self.daemon = True
21
- self.client: FelderaClient = client
22
- self.pipeline_name: str = pipeline_name
23
- self.view_name: str = view_name
24
- self.callback: Callable[[pd.DataFrame, int], None] = callback
25
- self.exception_callback: Callable[[BaseException], None] = exception_callback
26
- self.schema: Optional[dict] = None
27
-
28
- def run(self):
29
- """
30
- The main loop of the thread. Listens for data and calls the callback function on each chunk of data received.
31
-
32
- :meta private:
33
- """
34
-
35
- try:
36
- pipeline = self.client.get_pipeline(
37
- self.pipeline_name, PipelineFieldSelector.ALL
38
- )
39
-
40
- schemas = pipeline.tables + pipeline.views
41
- for schema in schemas:
42
- if schema.name == self.view_name:
43
- self.schema = schema
44
- break
45
-
46
- if self.schema is None:
47
- raise ValueError(
48
- f"Table or View {self.view_name} not found in the pipeline schema."
49
- )
50
-
51
- gen_obj = self.client.listen_to_pipeline(
52
- self.pipeline_name,
53
- self.view_name,
54
- format="json",
55
- case_sensitive=self.schema.case_sensitive,
56
- )
57
-
58
- iterator = gen_obj()
59
-
60
- for chunk in iterator:
61
- chunk: dict = chunk
62
- data: Optional[list[dict]] = chunk.get("json_data")
63
- seq_no: Optional[int] = chunk.get("sequence_number")
64
- if data is not None and seq_no is not None:
65
- self.callback(
66
- dataframe_from_response([data], self.schema.fields), seq_no
67
- )
68
- except BaseException as e:
69
- self.exception_callback(e)
File without changes
File without changes
File without changes
File without changes