feldera 0.155.0__tar.gz → 0.157.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of feldera might be problematic. Click here for more details.

Files changed (32) hide show
  1. {feldera-0.155.0 → feldera-0.157.0}/PKG-INFO +1 -7
  2. {feldera-0.155.0 → feldera-0.157.0}/README.md +0 -6
  3. feldera-0.157.0/feldera/_callback_runner.py +64 -0
  4. {feldera-0.155.0 → feldera-0.157.0}/feldera/output_handler.py +1 -5
  5. {feldera-0.155.0 → feldera-0.157.0}/feldera/pipeline.py +32 -87
  6. {feldera-0.155.0 → feldera-0.157.0}/feldera.egg-info/PKG-INFO +1 -7
  7. {feldera-0.155.0 → feldera-0.157.0}/pyproject.toml +1 -1
  8. feldera-0.155.0/feldera/_callback_runner.py +0 -123
  9. {feldera-0.155.0 → feldera-0.157.0}/feldera/__init__.py +0 -0
  10. {feldera-0.155.0 → feldera-0.157.0}/feldera/_helpers.py +0 -0
  11. {feldera-0.155.0 → feldera-0.157.0}/feldera/enums.py +0 -0
  12. {feldera-0.155.0 → feldera-0.157.0}/feldera/pipeline_builder.py +0 -0
  13. {feldera-0.155.0 → feldera-0.157.0}/feldera/rest/__init__.py +0 -0
  14. {feldera-0.155.0 → feldera-0.157.0}/feldera/rest/_helpers.py +0 -0
  15. {feldera-0.155.0 → feldera-0.157.0}/feldera/rest/_httprequests.py +0 -0
  16. {feldera-0.155.0 → feldera-0.157.0}/feldera/rest/config.py +0 -0
  17. {feldera-0.155.0 → feldera-0.157.0}/feldera/rest/errors.py +0 -0
  18. {feldera-0.155.0 → feldera-0.157.0}/feldera/rest/feldera_client.py +0 -0
  19. {feldera-0.155.0 → feldera-0.157.0}/feldera/rest/feldera_config.py +0 -0
  20. {feldera-0.155.0 → feldera-0.157.0}/feldera/rest/pipeline.py +0 -0
  21. {feldera-0.155.0 → feldera-0.157.0}/feldera/rest/sql_table.py +0 -0
  22. {feldera-0.155.0 → feldera-0.157.0}/feldera/rest/sql_view.py +0 -0
  23. {feldera-0.155.0 → feldera-0.157.0}/feldera/runtime_config.py +0 -0
  24. {feldera-0.155.0 → feldera-0.157.0}/feldera/stats.py +0 -0
  25. {feldera-0.155.0 → feldera-0.157.0}/feldera/tests/test_datafusionize.py +0 -0
  26. {feldera-0.155.0 → feldera-0.157.0}/feldera/testutils.py +0 -0
  27. {feldera-0.155.0 → feldera-0.157.0}/feldera/testutils_oidc.py +0 -0
  28. {feldera-0.155.0 → feldera-0.157.0}/feldera.egg-info/SOURCES.txt +0 -0
  29. {feldera-0.155.0 → feldera-0.157.0}/feldera.egg-info/dependency_links.txt +0 -0
  30. {feldera-0.155.0 → feldera-0.157.0}/feldera.egg-info/requires.txt +0 -0
  31. {feldera-0.155.0 → feldera-0.157.0}/feldera.egg-info/top_level.txt +0 -0
  32. {feldera-0.155.0 → feldera-0.157.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: feldera
3
- Version: 0.155.0
3
+ Version: 0.157.0
4
4
  Summary: The feldera python client
5
5
  Author-email: Feldera Team <dev@feldera.com>
6
6
  License: MIT
@@ -60,12 +60,6 @@ source .venv/activate
60
60
  uv pip install .
61
61
  ```
62
62
 
63
- You also have to install the `pytest` module:
64
-
65
- ```bash
66
- python3 -m pip install pytest
67
- ```
68
-
69
63
  ## Documentation
70
64
 
71
65
  The Python SDK documentation is available at
@@ -36,12 +36,6 @@ source .venv/activate
36
36
  uv pip install .
37
37
  ```
38
38
 
39
- You also have to install the `pytest` module:
40
-
41
- ```bash
42
- python3 -m pip install pytest
43
- ```
44
-
45
39
  ## Documentation
46
40
 
47
41
  The Python SDK documentation is available at
@@ -0,0 +1,64 @@
1
+ from threading import Thread
2
+ from typing import Callable, Optional
3
+
4
+ import pandas as pd
5
+ from feldera import FelderaClient
6
+ from feldera._helpers import dataframe_from_response
7
+ from feldera.enums import PipelineFieldSelector
8
+
9
+
10
+ class CallbackRunner(Thread):
11
+ def __init__(
12
+ self,
13
+ client: FelderaClient,
14
+ pipeline_name: str,
15
+ view_name: str,
16
+ callback: Callable[[pd.DataFrame, int], None],
17
+ ):
18
+ super().__init__()
19
+ self.daemon = True
20
+ self.client: FelderaClient = client
21
+ self.pipeline_name: str = pipeline_name
22
+ self.view_name: str = view_name
23
+ self.callback: Callable[[pd.DataFrame, int], None] = callback
24
+ self.schema: Optional[dict] = None
25
+
26
+ def run(self):
27
+ """
28
+ The main loop of the thread. Listens for data and calls the callback function on each chunk of data received.
29
+
30
+ :meta private:
31
+ """
32
+
33
+ pipeline = self.client.get_pipeline(
34
+ self.pipeline_name, PipelineFieldSelector.ALL
35
+ )
36
+
37
+ schemas = pipeline.tables + pipeline.views
38
+ for schema in schemas:
39
+ if schema.name == self.view_name:
40
+ self.schema = schema
41
+ break
42
+
43
+ if self.schema is None:
44
+ raise ValueError(
45
+ f"Table or View {self.view_name} not found in the pipeline schema."
46
+ )
47
+
48
+ gen_obj = self.client.listen_to_pipeline(
49
+ self.pipeline_name,
50
+ self.view_name,
51
+ format="json",
52
+ case_sensitive=self.schema.case_sensitive,
53
+ )
54
+
55
+ iterator = gen_obj()
56
+
57
+ for chunk in iterator:
58
+ chunk: dict = chunk
59
+ data: Optional[list[dict]] = chunk.get("json_data")
60
+ seq_no: Optional[int] = chunk.get("sequence_number")
61
+ if data is not None and seq_no is not None:
62
+ self.callback(
63
+ dataframe_from_response([data], self.schema.fields), seq_no
64
+ )
@@ -1,7 +1,5 @@
1
1
  import pandas as pd
2
- from typing import Optional
3
2
 
4
- from queue import Queue
5
3
  from feldera import FelderaClient
6
4
  from feldera._callback_runner import CallbackRunner
7
5
 
@@ -12,7 +10,6 @@ class OutputHandler:
12
10
  client: FelderaClient,
13
11
  pipeline_name: str,
14
12
  view_name: str,
15
- queue: Optional[Queue],
16
13
  ):
17
14
  """
18
15
  Initializes the output handler, but doesn't start it.
@@ -22,7 +19,6 @@ class OutputHandler:
22
19
  self.client: FelderaClient = client
23
20
  self.pipeline_name: str = pipeline_name
24
21
  self.view_name: str = view_name
25
- self.queue: Optional[Queue] = queue
26
22
  self.buffer: list[pd.DataFrame] = []
27
23
 
28
24
  # the callback that is passed to the `CallbackRunner`
@@ -32,7 +28,7 @@ class OutputHandler:
32
28
 
33
29
  # sets up the callback runner
34
30
  self.handler = CallbackRunner(
35
- self.client, self.pipeline_name, self.view_name, callback, queue
31
+ self.client, self.pipeline_name, self.view_name, callback
36
32
  )
37
33
 
38
34
  def start(self):
@@ -8,7 +8,6 @@ from uuid import UUID
8
8
 
9
9
  from typing import List, Dict, Callable, Optional, Generator, Mapping, Any
10
10
  from collections import deque
11
- from queue import Queue
12
11
 
13
12
  from feldera.rest.errors import FelderaAPIError
14
13
  from feldera.enums import (
@@ -26,7 +25,7 @@ from feldera.enums import (
26
25
  )
27
26
  from feldera.rest.pipeline import Pipeline as InnerPipeline
28
27
  from feldera.rest.feldera_client import FelderaClient
29
- from feldera._callback_runner import _CallbackRunnerInstruction, CallbackRunner
28
+ from feldera._callback_runner import CallbackRunner
30
29
  from feldera.output_handler import OutputHandler
31
30
  from feldera._helpers import ensure_dataframe_has_columns, chunk_dataframe
32
31
  from feldera.rest.sql_table import SQLTable
@@ -39,7 +38,6 @@ class Pipeline:
39
38
  def __init__(self, client: FelderaClient):
40
39
  self.client: FelderaClient = client
41
40
  self._inner: InnerPipeline | None = None
42
- self.views_tx: List[Dict[str, Queue]] = []
43
41
 
44
42
  @staticmethod
45
43
  def _from_inner(inner: InnerPipeline, client: FelderaClient) -> "Pipeline":
@@ -47,20 +45,6 @@ class Pipeline:
47
45
  pipeline._inner = inner
48
46
  return pipeline
49
47
 
50
- def __setup_output_listeners(self):
51
- """
52
- Internal function used to set up the output listeners.
53
-
54
- :meta private:
55
- """
56
-
57
- for view_queue in self.views_tx:
58
- for view_name, queue in view_queue.items():
59
- # sends a message to the callback runner to start listening
60
- queue.put(_CallbackRunnerInstruction.PipelineStarted)
61
- # block until the callback runner is ready
62
- queue.join()
63
-
64
48
  def refresh(self, field_selector: PipelineFieldSelector):
65
49
  """
66
50
  Calls the backend to get the updated, latest version of the pipeline.
@@ -237,23 +221,21 @@ class Pipeline:
237
221
  def listen(self, view_name: str) -> OutputHandler:
238
222
  """
239
223
  Follow the change stream (i.e., the output) of the provided view.
240
- Returns an output handler to read the changes.
224
+ Returns an output handle to read the changes.
241
225
 
242
- When the pipeline is stopped, these listeners are dropped.
226
+ When the pipeline is stopped, the handle is dropped.
243
227
 
244
- You must call this method before starting the pipeline to get the entire output of the view.
245
- If this method is called once the pipeline has started, you will only get the output from that point onwards.
228
+ The handle will only receive changes from the point in time when the listener is created.
229
+ In order to receive all changes since the pipeline started, you can create the pipeline in the `PAUSED` state
230
+ using :meth:`start_paused`, attach listeners and unpause the pipeline using :meth:`resume`.
246
231
 
247
232
  :param view_name: The name of the view to listen to.
248
233
  """
249
234
 
250
- queue: Optional[Queue] = None
251
-
252
235
  if self.status() not in [PipelineStatus.PAUSED, PipelineStatus.RUNNING]:
253
- queue = Queue(maxsize=1)
254
- self.views_tx.append({view_name: queue})
236
+ raise RuntimeError("Pipeline must be running or paused to listen to output")
255
237
 
256
- handler = OutputHandler(self.client, self.name, view_name, queue)
238
+ handler = OutputHandler(self.client, self.name, view_name)
257
239
  handler.start()
258
240
 
259
241
  return handler
@@ -264,8 +246,9 @@ class Pipeline:
264
246
  """
265
247
  Run the given callback on each chunk of the output of the specified view.
266
248
 
267
- You must call this method before starting the pipeline to operate on the entire output.
268
- You can call this method after the pipeline has started, but you will only get the output from that point onwards.
249
+ The callback will only receive changes from the point in time when the listener is created.
250
+ In order to receive all changes since the pipeline started, you can create the pipeline in the `PAUSED` state
251
+ using :meth:`start_paused`, attach listeners and unpause the pipeline using :meth:`resume`.
269
252
 
270
253
  :param view_name: The name of the view.
271
254
  :param callback: The callback to run on each chunk. The callback should take two arguments:
@@ -283,13 +266,10 @@ class Pipeline:
283
266
 
284
267
  """
285
268
 
286
- queue: Optional[Queue] = None
287
-
288
269
  if self.status() not in [PipelineStatus.RUNNING, PipelineStatus.PAUSED]:
289
- queue = Queue(maxsize=1)
290
- self.views_tx.append({view_name: queue})
270
+ raise RuntimeError("Pipeline must be running or paused to listen to output")
291
271
 
292
- handler = CallbackRunner(self.client, self.name, view_name, callback, queue)
272
+ handler = CallbackRunner(self.client, self.name, view_name, callback)
293
273
  handler.start()
294
274
 
295
275
  def wait_for_completion(
@@ -365,46 +345,6 @@ class Pipeline:
365
345
 
366
346
  return self.stats().global_metrics.pipeline_complete
367
347
 
368
- def start(self, wait: bool = True, timeout_s: Optional[float] = None):
369
- """
370
- .. _start:
371
-
372
- Starts this pipeline.
373
-
374
- - The pipeline must be in STOPPED state to start.
375
- - If the pipeline is in any other state, an error will be raised.
376
- - If the pipeline is in PAUSED state, use `.meth:resume` instead.
377
-
378
- :param timeout_s: The maximum time (in seconds) to wait for the
379
- pipeline to start.
380
- :param wait: Set True to wait for the pipeline to start. True by default
381
-
382
- :raises RuntimeError: If the pipeline is not in STOPPED state.
383
- """
384
-
385
- status = self.status()
386
- if status != PipelineStatus.STOPPED:
387
- raise RuntimeError(
388
- f"""Cannot start pipeline '{self.name}' in state \
389
- '{str(status.name)}'. The pipeline must be in STOPPED state before it can be \
390
- started. You can either stop the pipeline using the `Pipeline.stop()` \
391
- method or use `Pipeline.resume()` to resume a paused pipeline."""
392
- )
393
-
394
- if not wait:
395
- if len(self.views_tx) > 0:
396
- raise ValueError(
397
- "cannot start with 'wait=False' when output listeners are configured. Try setting 'wait=True'."
398
- )
399
-
400
- self.client.start_pipeline(self.name, wait=wait)
401
-
402
- return
403
-
404
- self.client.start_pipeline_as_paused(self.name, wait=wait, timeout_s=timeout_s)
405
- self.__setup_output_listeners()
406
- self.resume(timeout_s=timeout_s)
407
-
408
348
  def restart(self, timeout_s: Optional[float] = None):
409
349
  """
410
350
  Restarts the pipeline.
@@ -511,6 +451,25 @@ metrics"""
511
451
 
512
452
  self.client.activate_pipeline(self.name, wait=wait, timeout_s=timeout_s)
513
453
 
454
+ def start(self, wait: bool = True, timeout_s: Optional[float] = None):
455
+ """
456
+ .. _start:
457
+
458
+ Starts this pipeline.
459
+
460
+ - The pipeline must be in STOPPED state to start.
461
+ - If the pipeline is in any other state, an error will be raised.
462
+ - If the pipeline is in PAUSED state, use `.meth:resume` instead.
463
+
464
+ :param timeout_s: The maximum time (in seconds) to wait for the
465
+ pipeline to start.
466
+ :param wait: Set True to wait for the pipeline to start. True by default
467
+
468
+ :raises RuntimeError: If the pipeline is not in STOPPED state.
469
+ """
470
+
471
+ self.client.start_pipeline(self.name, wait=wait, timeout_s=timeout_s)
472
+
514
473
  def start_paused(self, wait: bool = True, timeout_s: Optional[float] = None):
515
474
  """
516
475
  Starts the pipeline in the paused state.
@@ -554,20 +513,6 @@ metrics"""
554
513
  pipeline to stop.
555
514
  """
556
515
 
557
- if wait:
558
- for view_queue in self.views_tx:
559
- for _, queue in view_queue.items():
560
- # sends a message to the callback runner to stop listening
561
- queue.put(_CallbackRunnerInstruction.RanToCompletion)
562
-
563
- if len(self.views_tx) > 0:
564
- while self.views_tx:
565
- view = self.views_tx.pop()
566
- for view_name, queue in view.items():
567
- # block until the callback runner has been stopped
568
- queue.join()
569
-
570
- time.sleep(3)
571
516
  self.client.stop_pipeline(
572
517
  self.name, force=force, wait=wait, timeout_s=timeout_s
573
518
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: feldera
3
- Version: 0.155.0
3
+ Version: 0.157.0
4
4
  Summary: The feldera python client
5
5
  Author-email: Feldera Team <dev@feldera.com>
6
6
  License: MIT
@@ -60,12 +60,6 @@ source .venv/activate
60
60
  uv pip install .
61
61
  ```
62
62
 
63
- You also have to install the `pytest` module:
64
-
65
- ```bash
66
- python3 -m pip install pytest
67
- ```
68
-
69
63
  ## Documentation
70
64
 
71
65
  The Python SDK documentation is available at
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
  name = "feldera"
7
7
  readme = "README.md"
8
8
  description = "The feldera python client"
9
- version = "0.155.0"
9
+ version = "0.157.0"
10
10
  license = { text = "MIT" }
11
11
  requires-python = ">=3.10"
12
12
  authors = [
@@ -1,123 +0,0 @@
1
- from enum import Enum
2
- from threading import Thread
3
- from typing import Callable, Optional
4
- from queue import Queue, Empty
5
-
6
- import pandas as pd
7
- from feldera import FelderaClient
8
- from feldera._helpers import dataframe_from_response
9
- from feldera.enums import PipelineFieldSelector
10
-
11
-
12
- class _CallbackRunnerInstruction(Enum):
13
- PipelineStarted = 1
14
- RanToCompletion = 2
15
-
16
-
17
- class CallbackRunner(Thread):
18
- def __init__(
19
- self,
20
- client: FelderaClient,
21
- pipeline_name: str,
22
- view_name: str,
23
- callback: Callable[[pd.DataFrame, int], None],
24
- queue: Optional[Queue],
25
- ):
26
- super().__init__()
27
- self.daemon = True
28
- self.client: FelderaClient = client
29
- self.pipeline_name: str = pipeline_name
30
- self.view_name: str = view_name
31
- self.callback: Callable[[pd.DataFrame, int], None] = callback
32
- self.queue: Optional[Queue] = queue
33
- self.schema: Optional[dict] = None
34
-
35
- def run(self):
36
- """
37
- The main loop of the thread. Listens for data and calls the callback function on each chunk of data received.
38
-
39
- :meta private:
40
- """
41
-
42
- pipeline = self.client.get_pipeline(
43
- self.pipeline_name, PipelineFieldSelector.ALL
44
- )
45
-
46
- schemas = pipeline.tables + pipeline.views
47
- for schema in schemas:
48
- if schema.name == self.view_name:
49
- self.schema = schema
50
- break
51
-
52
- if self.schema is None:
53
- raise ValueError(
54
- f"Table or View {self.view_name} not found in the pipeline schema."
55
- )
56
-
57
- # by default, we assume that the pipeline has been started
58
- ack = _CallbackRunnerInstruction.PipelineStarted
59
-
60
- # if there is Queue, we wait for the instruction to start the pipeline
61
- # this means that we are listening to the pipeline before running it, therefore, all data should be received
62
- if self.queue:
63
- ack = self.queue.get()
64
-
65
- match ack:
66
- # if the pipeline has actually been started, we start a listener
67
- case _CallbackRunnerInstruction.PipelineStarted:
68
- # listen to the pipeline
69
- gen_obj = self.client.listen_to_pipeline(
70
- self.pipeline_name,
71
- self.view_name,
72
- format="json",
73
- case_sensitive=self.schema.case_sensitive,
74
- )
75
-
76
- # if there is a queue set up, inform the main thread that the listener has been started, and it can
77
- # proceed with starting the pipeline
78
- if self.queue:
79
- # stop blocking the main thread on `join` for the previous message
80
- self.queue.task_done()
81
-
82
- iterator = gen_obj()
83
-
84
- for chunk in iterator:
85
- chunk: dict = chunk
86
- data: Optional[list[dict]] = chunk.get("json_data")
87
- seq_no: Optional[int] = chunk.get("sequence_number")
88
- if data is not None and seq_no is not None:
89
- self.callback(
90
- dataframe_from_response([data], self.schema.fields), seq_no
91
- )
92
-
93
- if self.queue:
94
- try:
95
- # if a non-blocking way, check if the queue has received further instructions
96
- # this should be a RanToCompletion instruction, which means that the pipeline has been
97
- # completed
98
- again_ack = self.queue.get_nowait()
99
-
100
- # if the queue has received a message
101
- if again_ack:
102
- match again_ack:
103
- case _CallbackRunnerInstruction.RanToCompletion:
104
- # stop blocking the main thread on `join` and return from this thread
105
- self.queue.task_done()
106
-
107
- return
108
-
109
- case _CallbackRunnerInstruction.PipelineStarted:
110
- # if the pipeline has been started again, which shouldn't happen,
111
- # ignore it and continue listening, call `task_done` to avoid blocking the main
112
- # thread on `join`
113
- self.queue.task_done()
114
-
115
- continue
116
- except Empty:
117
- # if the queue is empty, continue listening
118
- continue
119
-
120
- case _CallbackRunnerInstruction.RanToCompletion:
121
- if self.queue:
122
- self.queue.task_done()
123
- return
File without changes
File without changes
File without changes
File without changes
File without changes