feldera 0.69.0__py3-none-any.whl → 0.189.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of feldera might be problematic. Click here for more details.

@@ -1,16 +1,20 @@
1
- import pathlib
2
- from typing import Any, Dict, Optional
1
+ import json
3
2
  import logging
3
+ import pathlib
4
4
  import time
5
- import json
6
5
  from decimal import Decimal
7
- from typing import Generator
6
+ from typing import Any, Dict, Generator, Mapping, Optional
7
+ from urllib.parse import quote
8
+
9
+ import requests
8
10
 
11
+ from feldera.enums import BootstrapPolicy, PipelineFieldSelector, PipelineStatus
12
+ from feldera.rest._helpers import determine_client_version
13
+ from feldera.rest._httprequests import HttpRequests
9
14
  from feldera.rest.config import Config
15
+ from feldera.rest.errors import FelderaAPIError, FelderaTimeoutError
10
16
  from feldera.rest.feldera_config import FelderaConfig
11
- from feldera.rest.errors import FelderaTimeoutError
12
17
  from feldera.rest.pipeline import Pipeline
13
- from feldera.rest._httprequests import HttpRequests
14
18
 
15
19
 
16
20
  def _validate_no_none_keys_in_map(data):
@@ -34,35 +38,65 @@ def _prepare_boolean_input(value: bool) -> str:
34
38
 
35
39
  class FelderaClient:
36
40
  """
37
- A client for the Feldera HTTP API
41
+ A client for the Feldera HTTP API.
38
42
 
39
- A client instance is needed for every Feldera API method to know the location of
40
- Feldera and its permissions.
43
+ The client is initialized with the configuration needed for interacting with the
44
+ Feldera HTTP API, which it uses in its calls. Its methods are implemented
45
+ by issuing one or more HTTP requests to the API, and as such can provide higher
46
+ level operations (e.g., support waiting for the success of asynchronous HTTP API
47
+ functionality).
41
48
  """
42
49
 
43
50
  def __init__(
44
51
  self,
45
- url: str,
52
+ url: Optional[str] = None,
46
53
  api_key: Optional[str] = None,
47
54
  timeout: Optional[float] = None,
48
- requests_verify: bool = True,
55
+ connection_timeout: Optional[float] = None,
56
+ requests_verify: Optional[bool | str] = None,
49
57
  ) -> None:
50
58
  """
51
- :param url: The url to Feldera API (ex: https://try.feldera.com)
52
- :param api_key: The optional API key for Feldera
53
- :param timeout: (optional) The amount of time in seconds that the client will wait for a response before timing
54
- out.
55
- :param requests_verify: The `verify` parameter passed to the requests
56
- library. `True` by default.
59
+ Constructs a Feldera client.
60
+
61
+ :param url: (Optional) URL to the Feldera API.
62
+ The default is read from the `FELDERA_HOST` environment variable;
63
+ if the variable is not set, the default is `"http://localhost:8080"`.
64
+ :param api_key: (Optional) API key to access Feldera (format: `"apikey:..."`).
65
+ The default is read from the `FELDERA_API_KEY` environment variable;
66
+ if the variable is not set, the default is `None` (no API key is provided).
67
+ :param timeout: (Optional) Duration in seconds that the client will wait to receive
68
+ a response of an HTTP request, after which it times out.
69
+ The default is `None` (wait indefinitely; no timeout is enforced).
70
+ :param connection_timeout: (Optional) Duration in seconds that the client will wait
71
+ to establish the connection of an HTTP request, after which it times out.
72
+ The default is `None` (wait indefinitely; no timeout is enforced).
73
+ :param requests_verify: (Optional) The `verify` parameter passed to the `requests` library,
74
+ which is used internally to perform HTTP requests.
75
+ See also: https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification .
76
+ The default is based on the `FELDERA_HTTPS_TLS_CERT` or the `FELDERA_TLS_INSECURE` environment variable.
77
+ By setting `FELDERA_HTTPS_TLS_CERT` to a path, the default becomes the CA bundle it points to.
78
+ By setting `FELDERA_TLS_INSECURE` to `"1"`, `"true"` or `"yes"` (all case-insensitive), the default becomes
79
+ `False` which means to disable TLS verification by default. If both variables are set, the former takes
80
+ priority over the latter. If neither variable is set, the default is `True`.
57
81
  """
58
82
 
59
83
  self.config = Config(
60
- url, api_key, timeout=timeout, requests_verify=requests_verify
84
+ url=url,
85
+ api_key=api_key,
86
+ timeout=timeout,
87
+ connection_timeout=connection_timeout,
88
+ requests_verify=requests_verify,
61
89
  )
62
90
  self.http = HttpRequests(self.config)
63
91
 
64
92
  try:
65
- self.pipelines()
93
+ client_version = determine_client_version()
94
+ server_config = self.get_config()
95
+ if client_version != server_config.version:
96
+ logging.warning(
97
+ f"Feldera client is on version {client_version} while server is at "
98
+ f"{server_config.version}. There could be incompatibilities."
99
+ )
66
100
  except Exception as e:
67
101
  logging.error(f"Failed to connect to Feldera API: {e}")
68
102
  raise e
@@ -75,35 +109,46 @@ class FelderaClient:
75
109
 
76
110
  return FelderaClient(f"http://127.0.0.1:{port}")
77
111
 
78
- def get_pipeline(self, pipeline_name) -> Pipeline:
112
+ def get_pipeline(
113
+ self, pipeline_name: str, field_selector: PipelineFieldSelector
114
+ ) -> Pipeline:
79
115
  """
80
116
  Get a pipeline by name
81
117
 
82
118
  :param pipeline_name: The name of the pipeline
119
+ :param field_selector: Choose what pipeline information to refresh; see PipelineFieldSelector enum definition.
83
120
  """
84
121
 
85
- resp = self.http.get(f"/pipelines/{pipeline_name}")
122
+ resp = self.http.get(
123
+ f"/pipelines/{pipeline_name}?selector={field_selector.value}"
124
+ )
86
125
 
87
126
  return Pipeline.from_dict(resp)
88
127
 
89
- def get_runtime_config(self, pipeline_name) -> dict:
128
+ def get_runtime_config(self, pipeline_name) -> Mapping[str, Any]:
90
129
  """
91
130
  Get the runtime config of a pipeline by name
92
131
 
93
132
  :param pipeline_name: The name of the pipeline
94
133
  """
95
134
 
96
- resp: dict = self.http.get(f"/pipelines/{pipeline_name}")
135
+ resp: Mapping[str, Any] = self.http.get(f"/pipelines/{pipeline_name}")
136
+
137
+ runtime_config: Mapping[str, Any] | None = resp.get("runtime_config")
138
+ if runtime_config is None:
139
+ raise ValueError(f"Pipeline {pipeline_name} has no runtime config")
97
140
 
98
- return resp.get("runtime_config")
141
+ return runtime_config
99
142
 
100
- def pipelines(self) -> list[Pipeline]:
143
+ def pipelines(
144
+ self, selector: PipelineFieldSelector = PipelineFieldSelector.STATUS
145
+ ) -> list[Pipeline]:
101
146
  """
102
147
  Get all pipelines
103
148
  """
104
149
 
105
150
  resp = self.http.get(
106
- path="/pipelines",
151
+ path=f"/pipelines?selector={selector.value}",
107
152
  )
108
153
 
109
154
  return [Pipeline.from_dict(pipeline) for pipeline in resp]
@@ -112,12 +157,14 @@ class FelderaClient:
112
157
  wait = ["Pending", "CompilingSql", "SqlCompiled", "CompilingRust"]
113
158
 
114
159
  while True:
115
- p = self.get_pipeline(name)
160
+ p = self.get_pipeline(name, PipelineFieldSelector.STATUS)
116
161
  status = p.program_status
117
162
 
118
163
  if status == "Success":
119
- return p
164
+ return self.get_pipeline(name, PipelineFieldSelector.ALL)
120
165
  elif status not in wait:
166
+ p = self.get_pipeline(name, PipelineFieldSelector.ALL)
167
+
121
168
  # error handling for SQL compilation errors
122
169
  if status == "SqlError":
123
170
  sql_errors = p.program_error["sql_compilation"]["messages"]
@@ -130,17 +177,107 @@ class FelderaClient:
130
177
  err_msg += f"Code snippet:\n{sql_error['snippet']}"
131
178
  raise RuntimeError(err_msg)
132
179
 
133
- raise RuntimeError(f"The program failed to compile: {status}")
180
+ error_message = f"The program failed to compile: {status}\n"
181
+
182
+ rust_error = p.program_error.get("rust_compilation")
183
+ if rust_error is not None:
184
+ error_message += f"Rust Error: {rust_error}\n"
185
+
186
+ system_error = p.program_error.get("system_error")
187
+ if system_error is not None:
188
+ error_message += f"System Error: {system_error}"
189
+
190
+ raise RuntimeError(error_message)
134
191
 
135
192
  logging.debug("still compiling %s, waiting for 100 more milliseconds", name)
136
193
  time.sleep(0.1)
137
194
 
138
- def create_pipeline(self, pipeline: Pipeline) -> Pipeline:
195
+ def __wait_for_pipeline_state(
196
+ self,
197
+ pipeline_name: str,
198
+ state: str,
199
+ timeout_s: Optional[float] = None,
200
+ start: bool = True,
201
+ ):
202
+ start_time = time.monotonic()
203
+
204
+ while True:
205
+ if timeout_s is not None:
206
+ elapsed = time.monotonic() - start_time
207
+ if elapsed > timeout_s:
208
+ raise TimeoutError(
209
+ f"Timed out waiting for pipeline {pipeline_name} to "
210
+ f"transition to '{state}' state"
211
+ )
212
+
213
+ resp = self.get_pipeline(pipeline_name, PipelineFieldSelector.STATUS)
214
+ status = resp.deployment_status
215
+
216
+ if status.lower() == state.lower():
217
+ break
218
+ elif (
219
+ status == "Stopped"
220
+ and len(resp.deployment_error or {}) > 0
221
+ and resp.deployment_desired_status == "Stopped"
222
+ ):
223
+ err_msg = "Unable to START the pipeline:\n" if start else ""
224
+ raise RuntimeError(
225
+ f"""{err_msg}Unable to transition the pipeline to '{state}'.
226
+ Reason: The pipeline is in a STOPPED state due to the following error:
227
+ {resp.deployment_error.get("message", "")}"""
228
+ )
229
+
230
+ logging.debug(
231
+ "still starting %s, waiting for 100 more milliseconds", pipeline_name
232
+ )
233
+ time.sleep(0.1)
234
+
235
+ def __wait_for_pipeline_state_one_of(
236
+ self,
237
+ pipeline_name: str,
238
+ states: list[str],
239
+ timeout_s: float | None = None,
240
+ start: bool = True,
241
+ ) -> PipelineStatus:
242
+ start_time = time.monotonic()
243
+ states = [state.lower() for state in states]
244
+
245
+ while True:
246
+ if timeout_s is not None:
247
+ elapsed = time.monotonic() - start_time
248
+ if elapsed > timeout_s:
249
+ raise TimeoutError(
250
+ f"Timed out waiting for pipeline {pipeline_name} to"
251
+ f"transition to one of the states: {states}"
252
+ )
253
+
254
+ resp = self.get_pipeline(pipeline_name, PipelineFieldSelector.STATUS)
255
+ status = resp.deployment_status
256
+
257
+ if status.lower() in states:
258
+ return PipelineStatus.from_str(status)
259
+ elif (
260
+ status == "Stopped"
261
+ and len(resp.deployment_error or {}) > 0
262
+ and resp.deployment_desired_status == "Stopped"
263
+ ):
264
+ err_msg = "Unable to START the pipeline:\n" if start else ""
265
+ raise RuntimeError(
266
+ f"""{err_msg}Unable to transition the pipeline to one of the states: {states}.
267
+ Reason: The pipeline is in a STOPPED state due to the following error:
268
+ {resp.deployment_error.get("message", "")}"""
269
+ )
270
+ logging.debug(
271
+ "still starting %s, waiting for 100 more milliseconds", pipeline_name
272
+ )
273
+ time.sleep(0.1)
274
+
275
+ def create_pipeline(self, pipeline: Pipeline, wait: bool = True) -> Pipeline:
139
276
  """
140
277
  Create a pipeline if it doesn't exist and wait for it to compile
141
278
 
142
-
143
- :name: The name of the pipeline
279
+ :param pipeline: The pipeline to create
280
+ :param wait: Whether to wait for the pipeline to compile. True by default
144
281
  """
145
282
 
146
283
  body = {
@@ -158,11 +295,21 @@ class FelderaClient:
158
295
  body=body,
159
296
  )
160
297
 
298
+ if not wait:
299
+ return pipeline
300
+
161
301
  return self.__wait_for_compilation(pipeline.name)
162
302
 
163
- def create_or_update_pipeline(self, pipeline: Pipeline) -> Pipeline:
303
+ def create_or_update_pipeline(
304
+ self, pipeline: Pipeline, wait: bool = True
305
+ ) -> Pipeline:
164
306
  """
165
- Create a pipeline if it doesn't exist or update a pipeline and wait for it to compile
307
+ Create a pipeline if it doesn't exist or update a pipeline and wait for
308
+ it to compile
309
+
310
+ :param pipeline: The pipeline to create or update
311
+ :param wait: Whether to wait for the pipeline to compile. True by default
312
+ :return: The created or updated pipeline
166
313
  """
167
314
 
168
315
  body = {
@@ -180,21 +327,54 @@ class FelderaClient:
180
327
  body=body,
181
328
  )
182
329
 
330
+ if not wait:
331
+ return pipeline
332
+
183
333
  return self.__wait_for_compilation(pipeline.name)
184
334
 
185
- def patch_pipeline(self, name: str, sql: str):
335
+ def patch_pipeline(
336
+ self,
337
+ name: str,
338
+ sql: Optional[str] = None,
339
+ udf_rust: Optional[str] = None,
340
+ udf_toml: Optional[str] = None,
341
+ program_config: Optional[Mapping[str, Any]] = None,
342
+ runtime_config: Optional[Mapping[str, Any]] = None,
343
+ description: Optional[str] = None,
344
+ ):
186
345
  """
187
- Incrementally update the pipeline SQL
346
+ Incrementally update pipeline
188
347
 
189
348
  :param name: The name of the pipeline
190
- :param sql: The SQL snippet. Replaces the existing SQL code with this one.
191
349
  """
192
350
 
193
351
  self.http.patch(
194
352
  path=f"/pipelines/{name}",
195
- body={"program_code": sql},
353
+ body={
354
+ "program_code": sql,
355
+ "udf_rust": udf_rust,
356
+ "udf_toml": udf_toml,
357
+ "program_config": program_config,
358
+ "runtime_config": runtime_config,
359
+ "description": description,
360
+ },
196
361
  )
197
362
 
363
+ def testing_force_update_platform_version(self, name: str, platform_version: str):
364
+ self.http.post(
365
+ path=f"/pipelines/{name}/testing",
366
+ params={"set_platform_version": platform_version},
367
+ )
368
+
369
+ def update_pipeline_runtime(self, name: str):
370
+ """
371
+ Recompile a pipeline with the Feldera runtime version included in the currently installed Feldera platform.
372
+
373
+ :param name: The name of the pipeline
374
+ """
375
+
376
+ self.http.post(path=f"/pipelines/{name}/update_runtime")
377
+
198
378
  def delete_pipeline(self, name: str):
199
379
  """
200
380
  Deletes a pipeline by name
@@ -218,173 +398,360 @@ class FelderaClient:
218
398
 
219
399
  return resp
220
400
 
221
- def start_pipeline(self, pipeline_name: str, timeout_s: Optional[float] = 300):
401
+ def get_pipeline_logs(self, pipeline_name: str) -> Generator[str, None, None]:
402
+ """
403
+ Get the pipeline logs
404
+
405
+ :param name: The name of the pipeline
406
+ :return: A generator yielding the logs, one line at a time.
407
+ """
408
+ chunk: bytes
409
+ with self.http.get(
410
+ path=f"/pipelines/{pipeline_name}/logs",
411
+ stream=True,
412
+ ) as resp:
413
+ for chunk in resp.iter_lines(chunk_size=50000000):
414
+ if chunk:
415
+ yield chunk.decode("utf-8")
416
+
417
+ def activate_pipeline(
418
+ self, pipeline_name: str, wait: bool = True, timeout_s: Optional[float] = None
419
+ ) -> Optional[PipelineStatus]:
420
+ """
421
+
422
+ :param pipeline_name: The name of the pipeline to activate
423
+ :param wait: Set True to wait for the pipeline to activate. True by
424
+ default
425
+ :param timeout_s: The amount of time in seconds to wait for the
426
+ pipeline to activate.
427
+ """
428
+
429
+ self.http.post(
430
+ path=f"/pipelines/{pipeline_name}/activate",
431
+ )
432
+
433
+ if not wait:
434
+ return None
435
+
436
+ return self.__wait_for_pipeline_state_one_of(
437
+ pipeline_name, ["running", "AwaitingApproval"], timeout_s
438
+ )
439
+
440
+ def _inner_start_pipeline(
441
+ self,
442
+ pipeline_name: str,
443
+ initial: str = "running",
444
+ bootstrap_policy: Optional[BootstrapPolicy] = None,
445
+ wait: bool = True,
446
+ timeout_s: Optional[float] = None,
447
+ ) -> Optional[PipelineStatus]:
222
448
  """
223
449
 
224
450
  :param pipeline_name: The name of the pipeline to start
225
- :param timeout_s: The amount of time in seconds to wait for the pipeline to start. 300 seconds by default.
451
+ :param initial: The initial state to start the pipeline in. "running"
452
+ by default.
453
+ :param wait: Set True to wait for the pipeline to start. True by default
454
+ :param timeout_s: The amount of time in seconds to wait for the
455
+ pipeline to start.
226
456
  """
227
457
 
228
- if timeout_s is None:
229
- timeout_s = 300
458
+ params = {"initial": initial}
459
+ if bootstrap_policy is not None:
460
+ params["bootstrap_policy"] = bootstrap_policy.value
230
461
 
231
462
  self.http.post(
232
463
  path=f"/pipelines/{pipeline_name}/start",
464
+ params=params,
233
465
  )
234
466
 
235
- start_time = time.monotonic()
467
+ if not wait:
468
+ return None
236
469
 
237
- while True:
238
- if timeout_s is not None:
239
- elapsed = time.monotonic() - start_time
240
- if elapsed > timeout_s:
241
- raise TimeoutError(
242
- f"Timed out waiting for pipeline {pipeline_name} to start"
243
- )
470
+ return self.__wait_for_pipeline_state_one_of(
471
+ pipeline_name, [initial, "AwaitingApproval"], timeout_s
472
+ )
244
473
 
245
- resp = self.get_pipeline(pipeline_name)
246
- status = resp.deployment_status
474
+ def start_pipeline(
475
+ self,
476
+ pipeline_name: str,
477
+ bootstrap_policy: Optional[BootstrapPolicy] = None,
478
+ wait: bool = True,
479
+ timeout_s: Optional[float] = None,
480
+ ) -> Optional[PipelineStatus]:
481
+ """
247
482
 
248
- if status == "Running":
249
- break
250
- elif status == "Failed":
251
- raise RuntimeError(
252
- f"""Unable to START the pipeline.
253
- Reason: The pipeline is in a FAILED state due to the following error:
254
- {resp.deployment_error.get("message", "")}"""
255
- )
483
+ :param pipeline_name: The name of the pipeline to start
484
+ :param wait: Set True to wait for the pipeline to start.
485
+ True by default
486
+ :param timeout_s: The amount of time in seconds to wait for the
487
+ pipeline to start.
488
+ """
256
489
 
257
- logging.debug(
258
- "still starting %s, waiting for 100 more milliseconds", pipeline_name
259
- )
260
- time.sleep(0.1)
490
+ return self._inner_start_pipeline(
491
+ pipeline_name, "running", bootstrap_policy, wait, timeout_s
492
+ )
493
+
494
+ def start_pipeline_as_paused(
495
+ self,
496
+ pipeline_name: str,
497
+ bootstrap_policy: Optional[BootstrapPolicy] = None,
498
+ wait: bool = True,
499
+ timeout_s: float | None = None,
500
+ ) -> Optional[PipelineStatus]:
501
+ """
502
+ :param pipeline_name: The name of the pipeline to start as paused.
503
+ :param wait: Set True to wait for the pipeline to start as pause.
504
+ True by default
505
+ :param timeout_s: The amount of time in seconds to wait for the
506
+ pipeline to start.
507
+ """
508
+
509
+ return self._inner_start_pipeline(
510
+ pipeline_name, "paused", bootstrap_policy, wait, timeout_s
511
+ )
512
+
513
+ def start_pipeline_as_standby(
514
+ self,
515
+ pipeline_name: str,
516
+ bootstrap_policy: Optional[BootstrapPolicy] = None,
517
+ wait: bool = True,
518
+ timeout_s: Optional[float] = None,
519
+ ):
520
+ """
521
+ :param pipeline_name: The name of the pipeline to start as standby.
522
+ :param wait: Set True to wait for the pipeline to start as standby.
523
+ True by default
524
+ :param timeout_s: The amount of time in seconds to wait for the
525
+ pipeline to start.
526
+ """
527
+
528
+ self._inner_start_pipeline(
529
+ pipeline_name, "standby", bootstrap_policy, wait, timeout_s
530
+ )
531
+
532
+ def resume_pipeline(
533
+ self,
534
+ pipeline_name: str,
535
+ wait: bool = True,
536
+ timeout_s: Optional[float] = None,
537
+ ):
538
+ """
539
+ Resume a pipeline
540
+
541
+ :param pipeline_name: The name of the pipeline to stop
542
+ :param wait: Set True to wait for the pipeline to pause. True by default
543
+ :param timeout_s: The amount of time in seconds to wait for the pipeline
544
+ to pause.
545
+ """
546
+
547
+ self.http.post(
548
+ path=f"/pipelines/{pipeline_name}/resume",
549
+ )
550
+
551
+ if not wait:
552
+ return
553
+
554
+ self.__wait_for_pipeline_state(pipeline_name, "running", timeout_s)
261
555
 
262
556
  def pause_pipeline(
263
557
  self,
264
558
  pipeline_name: str,
265
- error_message: str = None,
266
- timeout_s: Optional[float] = 300,
559
+ wait: bool = True,
560
+ timeout_s: Optional[float] = None,
561
+ ):
562
+ """
563
+ Pause a pipeline
564
+
565
+ :param pipeline_name: The name of the pipeline to stop
566
+ :param error_message: The error message to show if the pipeline is in
567
+ STOPPED state due to a failure.
568
+ :param wait: Set True to wait for the pipeline to pause. True by default
569
+ :param timeout_s: The amount of time in seconds to wait for the pipeline
570
+ to pause.
571
+ """
572
+
573
+ self.http.post(
574
+ path=f"/pipelines/{pipeline_name}/pause",
575
+ )
576
+
577
+ if not wait:
578
+ return
579
+
580
+ self.__wait_for_pipeline_state(pipeline_name, "paused", timeout_s)
581
+
582
+ def approve_pipeline(
583
+ self,
584
+ pipeline_name: str,
585
+ ):
586
+ self.http.post(
587
+ path=f"/pipelines/{pipeline_name}/approve",
588
+ )
589
+
590
+ def stop_pipeline(
591
+ self,
592
+ pipeline_name: str,
593
+ force: bool,
594
+ wait: bool = True,
595
+ timeout_s: Optional[float] = None,
267
596
  ):
268
597
  """
269
598
  Stop a pipeline
270
599
 
271
600
  :param pipeline_name: The name of the pipeline to stop
272
- :param error_message: The error message to show if the pipeline is in FAILED state
273
- :param timeout_s: The amount of time in seconds to wait for the pipeline to pause. 300 seconds by default.
601
+ :param force: Set True to immediately scale compute resources to zero.
602
+ Set False to automatically checkpoint before stopping.
603
+ :param wait: Set True to wait for the pipeline to stop. True by default
604
+ :param timeout_s: The amount of time in seconds to wait for the pipeline
605
+ to stop.
274
606
  """
275
607
 
276
- if timeout_s is None:
277
- timeout_s = 300
608
+ params = {"force": str(force).lower()}
278
609
 
279
610
  self.http.post(
280
- path=f"/pipelines/{pipeline_name}/pause",
611
+ path=f"/pipelines/{pipeline_name}/stop",
612
+ params=params,
281
613
  )
282
614
 
283
- if error_message is None:
284
- error_message = "Unable to PAUSE the pipeline.\n"
615
+ if not wait:
616
+ return
285
617
 
286
- start_time = time.monotonic()
618
+ start = time.monotonic()
287
619
 
288
620
  while True:
289
- if timeout_s is not None:
290
- elapsed = time.monotonic() - start_time
291
- if elapsed > timeout_s:
292
- raise TimeoutError(
293
- f"Timed out waiting for pipeline {pipeline_name} to pause"
294
- )
621
+ if timeout_s is not None and time.monotonic() - start > timeout_s:
622
+ raise FelderaTimeoutError(
623
+ f"timeout error: pipeline '{pipeline_name}' did not stop in {timeout_s} seconds"
624
+ )
295
625
 
296
- resp = self.get_pipeline(pipeline_name)
297
- status = resp.deployment_status
626
+ status = self.get_pipeline(
627
+ pipeline_name, PipelineFieldSelector.STATUS
628
+ ).deployment_status
298
629
 
299
- if status == "Paused":
300
- break
301
- elif status == "Failed":
302
- raise RuntimeError(
303
- error_message
304
- + f"""Reason: The pipeline is in a FAILED state due to the following error:
305
- {resp.deployment_error.get("message", "")}"""
306
- )
630
+ if status == "Stopped":
631
+ return
307
632
 
308
633
  logging.debug(
309
- "still pausing %s, waiting for 100 more milliseconds", pipeline_name
634
+ "still stopping %s, waiting for 100 more milliseconds",
635
+ pipeline_name,
310
636
  )
311
637
  time.sleep(0.1)
312
638
 
313
- def shutdown_pipeline(self, pipeline_name: str, timeout_s: Optional[float] = 300):
639
+ def clear_storage(self, pipeline_name: str, timeout_s: Optional[float] = None):
314
640
  """
315
- Shutdown a pipeline
641
+ Clears the storage from the pipeline.
642
+ This operation cannot be canceled.
316
643
 
317
- :param pipeline_name: The name of the pipeline to shut down
318
- :param timeout_s: The amount of time in seconds to wait for the pipeline to shut down. Default is 300 seconds.
644
+ :param pipeline_name: The name of the pipeline
645
+ :param timeout_s: The amount of time in seconds to wait for the storage
646
+ to clear.
319
647
  """
320
-
321
- if timeout_s is None:
322
- timeout_s = 300
323
-
324
648
  self.http.post(
325
- path=f"/pipelines/{pipeline_name}/shutdown",
649
+ path=f"/pipelines/{pipeline_name}/clear",
326
650
  )
327
651
 
328
652
  start = time.monotonic()
329
653
 
330
- while time.monotonic() - start < timeout_s:
331
- status = self.get_pipeline(pipeline_name).deployment_status
654
+ while True:
655
+ if timeout_s is not None and time.monotonic() - start > timeout_s:
656
+ raise FelderaTimeoutError(
657
+ f"timeout error: pipeline '{pipeline_name}' did not clear storage in {timeout_s} seconds"
658
+ )
659
+ status = self.get_pipeline(
660
+ pipeline_name, PipelineFieldSelector.STATUS
661
+ ).storage_status
332
662
 
333
- if status == "Shutdown":
663
+ if status == "Cleared":
334
664
  return
335
665
 
336
666
  logging.debug(
337
- "still shutting down %s, waiting for 100 more milliseconds",
667
+ "still clearing %s, waiting for 100 more milliseconds",
338
668
  pipeline_name,
339
669
  )
340
670
  time.sleep(0.1)
341
671
 
342
- raise FelderaTimeoutError(
343
- f"timeout error: pipeline '{pipeline_name}' did not shutdown in {timeout_s} seconds"
672
+ def start_transaction(self, pipeline_name: str) -> int:
673
+ """
674
+ Start a new transaction.
675
+
676
+ Transaction ID.
677
+
678
+ :param pipeline_name: The name of the pipeline.
679
+ """
680
+
681
+ resp = self.http.post(
682
+ path=f"/pipelines/{pipeline_name}/start_transaction",
344
683
  )
345
684
 
346
- def suspend_pipeline(self, pipeline_name: str, timeout_s: Optional[float] = 300):
685
+ return int(resp.get("transaction_id"))
686
+
687
+ def commit_transaction(
688
+ self,
689
+ pipeline_name: str,
690
+ transaction_id: Optional[int] = None,
691
+ wait: bool = True,
692
+ timeout_s: Optional[float] = None,
693
+ ):
347
694
  """
348
- Suspend a pipeline
695
+ Commits the currently active transaction.
696
+
697
+ :param pipeline_name: The name of the pipeline.
698
+
699
+ :param transaction_id: If provided, the function verifies that the currently active transaction matches this ID.
700
+ If the active transaction ID does not match, the function raises an error.
701
+
702
+ :param wait: If True, the function blocks until the transaction either commits successfully or the timeout is reached.
703
+ If False, the function initiates the commit and returns immediately without waiting for completion. The default value is True.
704
+
705
+ :param timeout_s: Maximum time (in seconds) to wait for the transaction to commit when `wait` is True.
706
+ If None, the function will wait indefinitely.
349
707
 
350
- :param pipeline_name: The name of the pipeline to suspend
351
- :param timeout_s: The amount of time in seconds to wait for the pipeline to suspend. Default is 300 seconds.
708
+ :raises RuntimeError: If there is currently no transaction in progress.
709
+ :raises ValueError: If the provided `transaction_id` does not match the current transaction.
710
+ :raises TimeoutError: If the transaction does not commit within the specified timeout (when `wait` is True).
711
+ :raises FelderaAPIError: If the pipeline fails to commit a transaction.
352
712
  """
353
713
 
354
- if timeout_s is None:
355
- timeout_s = 300
714
+ # TODO: implement this without using /stats when we have a better pipeline status reporting API.
715
+ stats = self.get_pipeline_stats(pipeline_name)
716
+ current_transaction_id = stats["global_metrics"]["transaction_id"]
717
+
718
+ if current_transaction_id == 0:
719
+ raise RuntimeError(
720
+ "Attempting to commit a transaction, but there is no transaction in progress"
721
+ )
722
+
723
+ if transaction_id and current_transaction_id != transaction_id:
724
+ raise ValueError(
725
+ f"Specified transaction id {transaction_id} doesn't match current active transaction id {current_transaction_id}"
726
+ )
727
+
728
+ transaction_id = current_transaction_id
356
729
 
357
730
  self.http.post(
358
- path=f"/pipelines/{pipeline_name}/suspend",
731
+ path=f"/pipelines/{pipeline_name}/commit_transaction",
359
732
  )
360
733
 
361
- start = time.monotonic()
734
+ start_time = time.monotonic()
362
735
 
363
- while time.monotonic() - start < timeout_s:
364
- resp = self.get_pipeline(pipeline_name)
365
- status = resp.deployment_status
736
+ if not wait:
737
+ return
366
738
 
367
- if status == "Suspended":
368
- return
369
- elif status == "Failed":
370
- raise RuntimeError(
371
- f"""Unable to Suspend pipeline '{pipeline_name}'.\nReason: The pipeline is in a FAILED state due to the following error:
372
- {resp.deployment_error.get("message", "")}"""
373
- )
739
+ while True:
740
+ if timeout_s is not None:
741
+ elapsed = time.monotonic() - start_time
742
+ if elapsed > timeout_s:
743
+ raise TimeoutError("Timed out waiting for transaction to commit")
374
744
 
375
- logging.debug(
376
- "still suspending %s, waiting for 100 more milliseconds",
377
- pipeline_name,
378
- )
379
- time.sleep(0.1)
745
+ stats = self.get_pipeline_stats(pipeline_name)
746
+ if stats["global_metrics"]["transaction_id"] != transaction_id:
747
+ return
380
748
 
381
- raise FelderaTimeoutError(
382
- f"timeout error: pipeline '{pipeline_name}' did not suspend in {timeout_s} seconds"
383
- )
749
+ logging.debug("commit hasn't completed, waiting for 1 more second")
750
+ time.sleep(1.0)
384
751
 
385
752
  def checkpoint_pipeline(self, pipeline_name: str) -> int:
386
753
  """
387
- Checkpoint a fault-tolerant pipeline
754
+ Checkpoint a pipeline.
388
755
 
389
756
  :param pipeline_name: The name of the pipeline to checkpoint
390
757
  """
@@ -434,13 +801,15 @@ Reason: The pipeline is in a FAILED state due to the following error:
434
801
  pipeline_name: str,
435
802
  table_name: str,
436
803
  format: str,
437
- data: list[list | str | dict] | dict,
804
+ data: list[list | str | dict] | dict | str,
438
805
  array: bool = False,
439
806
  force: bool = False,
440
807
  update_format: str = "raw",
441
- json_flavor: str = None,
808
+ json_flavor: Optional[str] = None,
442
809
  serialize: bool = True,
443
- ):
810
+ wait: bool = True,
811
+ wait_timeout_s: Optional[float] = None,
812
+ ) -> str:
444
813
  """
445
814
  Insert data into a pipeline
446
815
 
@@ -457,6 +826,11 @@ Reason: The pipeline is in a FAILED state due to the following error:
457
826
  "debezium_mysql", "snowflake", "kafka_connect_json_converter", "pandas"
458
827
  :param data: The data to insert
459
828
  :param serialize: If True, the data will be serialized to JSON. True by default
829
+ :param wait: If True, blocks until this input has been processed by the pipeline
830
+ :param wait_timeout_s: The timeout in seconds to wait for this set of
831
+ inputs to be processed by the pipeline. None by default
832
+
833
+ :returns: The completion token to this input.
460
834
  """
461
835
 
462
836
  if format not in ["json", "csv"]:
@@ -490,15 +864,15 @@ Reason: The pipeline is in a FAILED state due to the following error:
490
864
  _validate_no_none_keys_in_map(datum.get("insert", {}))
491
865
  _validate_no_none_keys_in_map(datum.get("delete", {}))
492
866
  else:
493
- data: dict = data
867
+ data: Mapping[str, Any] = data
494
868
  _validate_no_none_keys_in_map(data.get("insert", {}))
495
869
  _validate_no_none_keys_in_map(data.get("delete", {}))
496
870
  else:
497
871
  _validate_no_none_keys_in_map(data)
498
872
 
499
873
  # python sends `True` which isn't accepted by the backend
500
- array = _prepare_boolean_input(array)
501
- force = _prepare_boolean_input(force)
874
+ array: str = _prepare_boolean_input(array)
875
+ force: str = _prepare_boolean_input(force)
502
876
 
503
877
  params = {
504
878
  "force": force,
@@ -518,14 +892,96 @@ Reason: The pipeline is in a FAILED state due to the following error:
518
892
  content_type = "text/csv"
519
893
  data = bytes(str(data), "utf-8")
520
894
 
521
- self.http.post(
522
- path=f"/pipelines/{pipeline_name}/ingress/{table_name}",
895
+ resp = self.http.post(
896
+ path=f"/pipelines/{quote(pipeline_name, safe='')}/ingress/{quote(table_name, safe='')}",
523
897
  params=params,
524
898
  content_type=content_type,
525
899
  body=data,
526
900
  serialize=serialize,
527
901
  )
528
902
 
903
+ token = resp.get("token")
904
+ if token is None:
905
+ raise FelderaAPIError("response did not contain a completion token", resp)
906
+
907
+ if not wait:
908
+ return token
909
+
910
+ self.wait_for_token(pipeline_name, token, timeout_s=wait_timeout_s)
911
+
912
+ return token
913
+
914
+ def completion_token_processed(self, pipeline_name: str, token: str) -> bool:
915
+ """
916
+ Check whether the pipeline has finished processing all inputs received from the connector before
917
+ the token was generated.
918
+
919
+ :param pipeline_name: The name of the pipeline
920
+ :param token: The token to check for completion
921
+ :return: True if the pipeline has finished processing all inputs represented by the token, False otherwise
922
+ """
923
+
924
+ params = {
925
+ "token": token,
926
+ }
927
+
928
+ resp = self.http.get(
929
+ path=f"/pipelines/{quote(pipeline_name, safe='')}/completion_status",
930
+ params=params,
931
+ )
932
+
933
+ status: Optional[str] = resp.get("status")
934
+
935
+ if status is None:
936
+ raise FelderaAPIError(
937
+ f"got empty status when checking for completion status for token: {token}",
938
+ resp,
939
+ )
940
+
941
+ return status.lower() == "complete"
942
+
943
+ def wait_for_token(
944
+ self, pipeline_name: str, token: str, timeout_s: Optional[float] = None
945
+ ):
946
+ """
947
+ Blocks until all records represented by this completion token have
948
+ been processed.
949
+
950
+ :param pipeline_name: The name of the pipeline
951
+ :param token: The token to check for completion
952
+ :param timeout_s: The amount of time in seconds to wait for the pipeline
953
+ to process these records.
954
+ """
955
+
956
+ start = time.monotonic()
957
+ end = start + timeout_s if timeout_s else None
958
+ initial_backoff = 0.1
959
+ max_backoff = 5
960
+ exponent = 1.2
961
+ retries = 0
962
+
963
+ while True:
964
+ if end:
965
+ if time.monotonic() > end:
966
+ raise FelderaTimeoutError(
967
+ f"timeout error: pipeline '{pipeline_name}' did not"
968
+ + f" process records represented by token {token} within"
969
+ + f" {timeout_s}"
970
+ )
971
+
972
+ if self.completion_token_processed(pipeline_name, token):
973
+ break
974
+
975
+ elapsed = time.monotonic() - start
976
+ logging.debug(
977
+ f"still waiting for inputs represented by {token} to be processed; elapsed: {elapsed}s"
978
+ )
979
+
980
+ retries += 1
981
+ backoff = min(max_backoff, initial_backoff * (exponent**retries))
982
+
983
+ time.sleep(backoff)
984
+
529
985
  def listen_to_pipeline(
530
986
  self,
531
987
  pipeline_name: str,
@@ -534,6 +990,7 @@ Reason: The pipeline is in a FAILED state due to the following error:
534
990
  backpressure: bool = True,
535
991
  array: bool = False,
536
992
  timeout: Optional[float] = None,
993
+ case_sensitive: bool = False,
537
994
  ):
538
995
  """
539
996
  Listen for updates to views for pipeline, yields the chunks of data
@@ -549,6 +1006,7 @@ Reason: The pipeline is in a FAILED state due to the following error:
549
1006
  "json" format, the default value is False
550
1007
 
551
1008
  :param timeout: The amount of time in seconds to listen to the stream for
1009
+ :param case_sensitive: True if the table name is case sensitive or a reserved keyword, False by default
552
1010
  """
553
1011
 
554
1012
  params = {
@@ -559,21 +1017,26 @@ Reason: The pipeline is in a FAILED state due to the following error:
559
1017
  if format == "json":
560
1018
  params["array"] = _prepare_boolean_input(array)
561
1019
 
562
- resp = self.http.post(
563
- path=f"/pipelines/{pipeline_name}/egress/{table_name}",
1020
+ table_name = f'"{table_name}"' if case_sensitive else table_name
1021
+
1022
+ resp: requests.Response = self.http.post(
1023
+ path=f"/pipelines/{quote(pipeline_name, safe='')}/egress/{quote(table_name, safe='')}",
564
1024
  params=params,
565
1025
  stream=True,
566
1026
  )
567
1027
 
568
1028
  end = time.monotonic() + timeout if timeout else None
569
1029
 
570
- # Using the default chunk size below makes `iter_lines` extremely
571
- # inefficient when dealing with long lines.
572
- for chunk in resp.iter_lines(chunk_size=50000000):
573
- if end and time.monotonic() > end:
574
- break
575
- if chunk:
576
- yield json.loads(chunk, parse_float=Decimal)
1030
+ def generator():
1031
+ # Using the default chunk size below makes `iter_lines` extremely
1032
+ # inefficient when dealing with long lines.
1033
+ for chunk in resp.iter_lines(chunk_size=50000000):
1034
+ if end and time.monotonic() > end:
1035
+ break
1036
+ if chunk:
1037
+ yield json.loads(chunk, parse_float=Decimal)
1038
+
1039
+ return generator
577
1040
 
578
1041
  def query_as_text(
579
1042
  self, pipeline_name: str, query: str
@@ -602,6 +1065,27 @@ Reason: The pipeline is in a FAILED state due to the following error:
602
1065
  if chunk:
603
1066
  yield chunk.decode("utf-8")
604
1067
 
1068
+ def query_as_hash(self, pipeline_name: str, query: str) -> str:
1069
+ """
1070
+ Executes an ad-hoc query on the specified pipeline and returns a hash of the result.
1071
+
1072
+ :param pipeline_name: The name of the pipeline to query.
1073
+ :param query: The SQL query to be executed.
1074
+ :return: A string containing the hash of the query result.
1075
+ """
1076
+ params = {
1077
+ "pipeline_name": pipeline_name,
1078
+ "sql": query,
1079
+ "format": "hash",
1080
+ }
1081
+
1082
+ resp = self.http.get(
1083
+ path=f"/pipelines/{pipeline_name}/query",
1084
+ params=params,
1085
+ stream=False,
1086
+ )
1087
+ return resp
1088
+
605
1089
  def query_as_parquet(self, pipeline_name: str, query: str, path: str):
606
1090
  """
607
1091
  Executes an ad-hoc query on the specified pipeline and saves the result to a parquet file.
@@ -640,7 +1124,7 @@ Reason: The pipeline is in a FAILED state due to the following error:
640
1124
 
641
1125
  def query_as_json(
642
1126
  self, pipeline_name: str, query: str
643
- ) -> Generator[dict, None, None]:
1127
+ ) -> Generator[Mapping[str, Any], None, None]:
644
1128
  """
645
1129
  Executes an ad-hoc query on the specified pipeline and returns the result as a generator that yields
646
1130
  rows of the query as Python dictionaries.
@@ -662,7 +1146,7 @@ Reason: The pipeline is in a FAILED state due to the following error:
662
1146
  stream=True,
663
1147
  )
664
1148
 
665
- for chunk in resp.iter_lines(chunk_size=50000000):
1149
+ for chunk in resp.iter_lines(chunk_size=1024):
666
1150
  if chunk:
667
1151
  yield json.loads(chunk, parse_float=Decimal)
668
1152
 
@@ -714,9 +1198,66 @@ Reason: The pipeline is in a FAILED state due to the following error:
714
1198
 
715
1199
  def get_config(self) -> FelderaConfig:
716
1200
  """
717
- Get general feldera configuration.
1201
+ Retrieves the general Feldera server configuration.
718
1202
  """
719
1203
 
720
1204
  resp = self.http.get(path="/config")
721
1205
 
722
1206
  return FelderaConfig(resp)
1207
+
1208
+ def get_pipeline_support_bundle(
1209
+ self, pipeline_name: str, params: Optional[Dict[str, Any]] = None
1210
+ ) -> bytes:
1211
+ """
1212
+ Generate a support bundle containing diagnostic information from a pipeline.
1213
+
1214
+ This endpoint collects various diagnostic data from the pipeline including
1215
+ circuit profile, heap profile, metrics, logs, stats, and connector statistics,
1216
+ and packages them into a single ZIP file for support purposes.
1217
+
1218
+ :param pipeline_name: The name of the pipeline
1219
+ :param params: Optional query parameters to control data collection
1220
+ :return: The support bundle as bytes (ZIP file)
1221
+ :raises FelderaAPIError: If the pipeline does not exist or if there's an error
1222
+ """
1223
+
1224
+ resp = self.http.get(
1225
+ path=f"/pipelines/{pipeline_name}/support_bundle",
1226
+ params=params,
1227
+ stream=True,
1228
+ )
1229
+
1230
+ buffer = b""
1231
+ for chunk in resp.iter_content(chunk_size=1024):
1232
+ if chunk:
1233
+ buffer += chunk
1234
+
1235
+ return buffer
1236
+
1237
+ def generate_completion_token(
1238
+ self, pipeline_name: str, table_name: str, connector_name: str
1239
+ ) -> str:
1240
+ """
1241
+ Generate a completion token that can be passed to :meth:`.FelderaClient.completion_token_processed` to
1242
+ check whether the pipeline has finished processing all inputs received from the connector before
1243
+ the token was generated.
1244
+
1245
+ :param pipeline_name: The name of the pipeline
1246
+ :param table_name: The name of the table associated with this connector.
1247
+ :param connector_name: The name of the connector.
1248
+
1249
+ :raises FelderaAPIError: If the connector cannot be found, or if the pipeline is not running.
1250
+ """
1251
+
1252
+ resp = self.http.get(
1253
+ path=f"/pipelines/{pipeline_name}/tables/{table_name}/connectors/{connector_name}/completion_token",
1254
+ )
1255
+
1256
+ token: str | None = resp.get("token")
1257
+
1258
+ if token is None:
1259
+ raise ValueError(
1260
+ "got invalid response from feldera when generating completion token"
1261
+ )
1262
+
1263
+ return token