feldera 0.27.0__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of feldera might be problematic. Click here for more details.
- feldera/__init__.py +11 -3
- feldera/_callback_runner.py +12 -11
- feldera/_helpers.py +39 -35
- feldera/enums.py +1 -1
- feldera/output_handler.py +11 -4
- feldera/pipeline.py +111 -24
- feldera/pipeline_builder.py +15 -4
- feldera/rest/__init__.py +1 -1
- feldera/rest/_httprequests.py +69 -52
- feldera/rest/config.py +5 -5
- feldera/rest/errors.py +14 -11
- feldera/rest/feldera_client.py +172 -38
- feldera/rest/pipeline.py +18 -10
- feldera/rest/sql_table.py +10 -4
- feldera/rest/sql_view.py +10 -4
- feldera/runtime_config.py +11 -12
- {feldera-0.27.0.dist-info → feldera-0.28.0.dist-info}/METADATA +14 -3
- feldera-0.28.0.dist-info/RECORD +20 -0
- {feldera-0.27.0.dist-info → feldera-0.28.0.dist-info}/WHEEL +1 -1
- feldera-0.27.0.dist-info/RECORD +0 -20
- {feldera-0.27.0.dist-info → feldera-0.28.0.dist-info}/top_level.txt +0 -0
feldera/rest/_httprequests.py
CHANGED
|
@@ -2,7 +2,11 @@ import logging
|
|
|
2
2
|
|
|
3
3
|
from feldera.rest.config import Config
|
|
4
4
|
|
|
5
|
-
from feldera.rest.errors import
|
|
5
|
+
from feldera.rest.errors import (
|
|
6
|
+
FelderaAPIError,
|
|
7
|
+
FelderaTimeoutError,
|
|
8
|
+
FelderaCommunicationError,
|
|
9
|
+
)
|
|
6
10
|
|
|
7
11
|
import json
|
|
8
12
|
import requests
|
|
@@ -16,23 +20,21 @@ def json_serialize(body: Any) -> str:
|
|
|
16
20
|
class HttpRequests:
|
|
17
21
|
def __init__(self, config: Config) -> None:
|
|
18
22
|
self.config = config
|
|
19
|
-
self.headers = {
|
|
20
|
-
"User-Agent": "feldera-python-sdk/v1"
|
|
21
|
-
}
|
|
23
|
+
self.headers = {"User-Agent": "feldera-python-sdk/v1"}
|
|
22
24
|
if self.config.api_key:
|
|
23
25
|
self.headers["Authorization"] = f"Bearer {self.config.api_key}"
|
|
24
26
|
|
|
25
27
|
def send_request(
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
28
|
+
self,
|
|
29
|
+
http_method: Callable,
|
|
30
|
+
path: str,
|
|
31
|
+
body: Optional[
|
|
32
|
+
Union[Mapping[str, Any], Sequence[Mapping[str, Any]], List[str], str]
|
|
33
|
+
] = None,
|
|
34
|
+
content_type: str = "application/json",
|
|
35
|
+
params: Optional[Mapping[str, Any]] = None,
|
|
36
|
+
stream: bool = False,
|
|
37
|
+
serialize: bool = True,
|
|
36
38
|
) -> Any:
|
|
37
39
|
"""
|
|
38
40
|
:param http_method: The HTTP method to use. Takes the equivalent `requests.*` module. (Example: `requests.get`)
|
|
@@ -53,7 +55,10 @@ class HttpRequests:
|
|
|
53
55
|
|
|
54
56
|
logging.debug(
|
|
55
57
|
"sending %s request to: %s with headers: %s, and params: %s",
|
|
56
|
-
http_method.__name__,
|
|
58
|
+
http_method.__name__,
|
|
59
|
+
request_path,
|
|
60
|
+
str(headers),
|
|
61
|
+
str(params),
|
|
57
62
|
)
|
|
58
63
|
|
|
59
64
|
if http_method.__name__ == "get":
|
|
@@ -62,6 +67,7 @@ class HttpRequests:
|
|
|
62
67
|
timeout=timeout,
|
|
63
68
|
headers=headers,
|
|
64
69
|
params=params,
|
|
70
|
+
stream=stream,
|
|
65
71
|
)
|
|
66
72
|
elif isinstance(body, bytes):
|
|
67
73
|
request = http_method(
|
|
@@ -81,9 +87,8 @@ class HttpRequests:
|
|
|
81
87
|
params=params,
|
|
82
88
|
stream=stream,
|
|
83
89
|
)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
resp = self.__validate(request)
|
|
90
|
+
|
|
91
|
+
resp = self.__validate(request, stream=stream)
|
|
87
92
|
logging.debug("got response: %s", str(resp))
|
|
88
93
|
return resp
|
|
89
94
|
|
|
@@ -93,59 +98,63 @@ class HttpRequests:
|
|
|
93
98
|
raise FelderaCommunicationError(str(err)) from err
|
|
94
99
|
|
|
95
100
|
def get(
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
101
|
+
self,
|
|
102
|
+
path: str,
|
|
103
|
+
params: Optional[Mapping[str, Any]] = None,
|
|
104
|
+
stream: bool = False,
|
|
99
105
|
) -> Any:
|
|
100
|
-
return self.send_request(requests.get, path, params)
|
|
106
|
+
return self.send_request(requests.get, path, params=params, stream=stream)
|
|
101
107
|
|
|
102
108
|
def post(
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
109
|
+
self,
|
|
110
|
+
path: str,
|
|
111
|
+
body: Optional[
|
|
112
|
+
Union[Mapping[str, Any], Sequence[Mapping[str, Any]], List[str], str]
|
|
113
|
+
] = None,
|
|
114
|
+
content_type: Optional[str] = "application/json",
|
|
115
|
+
params: Optional[Mapping[str, Any]] = None,
|
|
116
|
+
stream: bool = False,
|
|
117
|
+
serialize: bool = True,
|
|
112
118
|
) -> Any:
|
|
113
119
|
return self.send_request(
|
|
114
120
|
requests.post,
|
|
115
121
|
path,
|
|
116
122
|
body,
|
|
117
123
|
content_type,
|
|
118
|
-
params,
|
|
119
|
-
|
|
124
|
+
params,
|
|
125
|
+
stream=stream,
|
|
126
|
+
serialize=serialize,
|
|
120
127
|
)
|
|
121
128
|
|
|
122
129
|
def patch(
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
+
self,
|
|
131
|
+
path: str,
|
|
132
|
+
body: Optional[
|
|
133
|
+
Union[Mapping[str, Any], Sequence[Mapping[str, Any]], List[str], str]
|
|
134
|
+
] = None,
|
|
135
|
+
content_type: Optional[str] = "application/json",
|
|
136
|
+
params: Optional[Mapping[str, Any]] = None,
|
|
130
137
|
) -> Any:
|
|
131
138
|
return self.send_request(requests.patch, path, body, content_type, params)
|
|
132
139
|
|
|
133
140
|
def put(
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
+
self,
|
|
142
|
+
path: str,
|
|
143
|
+
body: Optional[
|
|
144
|
+
Union[Mapping[str, Any], Sequence[Mapping[str, Any]], List[str], str]
|
|
145
|
+
] = None,
|
|
146
|
+
content_type: Optional[str] = "application/json",
|
|
147
|
+
params: Optional[Mapping[str, Any]] = None,
|
|
141
148
|
) -> Any:
|
|
142
149
|
return self.send_request(requests.put, path, body, content_type, params)
|
|
143
150
|
|
|
144
151
|
def delete(
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
152
|
+
self,
|
|
153
|
+
path: str,
|
|
154
|
+
body: Optional[
|
|
155
|
+
Union[Mapping[str, Any], Sequence[Mapping[str, Any]], List[str]]
|
|
156
|
+
] = None,
|
|
157
|
+
params: Optional[Mapping[str, Any]] = None,
|
|
149
158
|
) -> Any:
|
|
150
159
|
return self.send_request(requests.delete, path, body, params=params)
|
|
151
160
|
|
|
@@ -156,9 +165,17 @@ class HttpRequests:
|
|
|
156
165
|
return request.json()
|
|
157
166
|
|
|
158
167
|
@staticmethod
|
|
159
|
-
def __validate(request: requests.Response) -> Any:
|
|
168
|
+
def __validate(request: requests.Response, stream=False) -> Any:
|
|
160
169
|
try:
|
|
161
170
|
request.raise_for_status()
|
|
171
|
+
|
|
172
|
+
if stream:
|
|
173
|
+
return request
|
|
174
|
+
if request.headers.get("content-type") == "text/plain":
|
|
175
|
+
return request.text
|
|
176
|
+
elif request.headers.get("content-type") == "application/octet-stream":
|
|
177
|
+
return request.content
|
|
178
|
+
|
|
162
179
|
resp = HttpRequests.__to_json(request)
|
|
163
180
|
return resp
|
|
164
181
|
except requests.exceptions.HTTPError as err:
|
feldera/rest/config.py
CHANGED
|
@@ -7,11 +7,11 @@ class Config:
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
def __init__(
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
10
|
+
self,
|
|
11
|
+
url: str,
|
|
12
|
+
api_key: Optional[str] = None,
|
|
13
|
+
version: Optional[str] = None,
|
|
14
|
+
timeout: Optional[float] = None,
|
|
15
15
|
) -> None:
|
|
16
16
|
"""
|
|
17
17
|
:param url: The url to the Feldera API (ex: https://try.feldera.com)
|
feldera/rest/errors.py
CHANGED
|
@@ -25,31 +25,34 @@ class FelderaAPIError(FelderaError):
|
|
|
25
25
|
self.message = None
|
|
26
26
|
self.details = None
|
|
27
27
|
|
|
28
|
+
err_msg = ""
|
|
29
|
+
|
|
28
30
|
if request.text:
|
|
29
31
|
try:
|
|
30
32
|
json_data = json.loads(request.text)
|
|
33
|
+
|
|
34
|
+
self.error_code = json_data.get("error_code")
|
|
35
|
+
if self.error_code:
|
|
36
|
+
err_msg += f"\nError Code: {self.error_code}"
|
|
31
37
|
self.message = json_data.get("message")
|
|
38
|
+
if self.message:
|
|
39
|
+
err_msg += f"\nMessage: {self.message}"
|
|
32
40
|
self.details = json_data.get("details")
|
|
33
|
-
|
|
34
|
-
except:
|
|
41
|
+
except Exception:
|
|
35
42
|
self.message = request.text
|
|
36
43
|
|
|
37
|
-
|
|
38
|
-
if self.error_code:
|
|
39
|
-
return f"FelderaAPIError: {self.error}\n Error code: {self.error_code}\n Error message: {self.message}\n Details: {self.details}"
|
|
40
|
-
else:
|
|
41
|
-
return f"FelderaAPIError: {self.error}\n {self.message}"
|
|
44
|
+
super().__init__(err_msg)
|
|
42
45
|
|
|
43
46
|
|
|
44
47
|
class FelderaTimeoutError(FelderaError):
|
|
45
48
|
"""Error when Feldera operation takes longer than expected"""
|
|
46
49
|
|
|
47
|
-
def
|
|
48
|
-
|
|
50
|
+
def __init__(self, err: str) -> None:
|
|
51
|
+
super().__init__(f"Timeout connecting to Feldera: {err}")
|
|
49
52
|
|
|
50
53
|
|
|
51
54
|
class FelderaCommunicationError(FelderaError):
|
|
52
55
|
"""Error when connection to Feldera"""
|
|
53
56
|
|
|
54
|
-
def
|
|
55
|
-
|
|
57
|
+
def __init__(self, err: str) -> None:
|
|
58
|
+
super().__init__(f"Cannot connect to Feldera API: {err}")
|
feldera/rest/feldera_client.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import pathlib
|
|
1
2
|
from typing import Optional
|
|
2
3
|
import logging
|
|
3
4
|
import time
|
|
4
5
|
import json
|
|
5
6
|
from decimal import Decimal
|
|
7
|
+
from typing import Generator
|
|
6
8
|
|
|
7
9
|
from feldera.rest.config import Config
|
|
8
10
|
from feldera.rest.pipeline import Pipeline
|
|
@@ -22,10 +24,10 @@ class FelderaClient:
|
|
|
22
24
|
"""
|
|
23
25
|
|
|
24
26
|
def __init__(
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
self,
|
|
28
|
+
url: str,
|
|
29
|
+
api_key: Optional[str] = None,
|
|
30
|
+
timeout: Optional[int] = None,
|
|
29
31
|
) -> None:
|
|
30
32
|
"""
|
|
31
33
|
:param url: The url to Feldera API (ex: https://try.feldera.com)
|
|
@@ -86,7 +88,18 @@ class FelderaClient:
|
|
|
86
88
|
if status == "Success":
|
|
87
89
|
return p
|
|
88
90
|
elif status not in wait:
|
|
89
|
-
#
|
|
91
|
+
# error handling for SQL compilation errors
|
|
92
|
+
if isinstance(status, dict):
|
|
93
|
+
sql_errors = status.get("SqlError")
|
|
94
|
+
if sql_errors:
|
|
95
|
+
err_msg = f"Pipeline {name} failed to compile:\n"
|
|
96
|
+
for sql_error in sql_errors:
|
|
97
|
+
err_msg += (
|
|
98
|
+
f"{sql_error['error_type']}\n{sql_error['message']}\n"
|
|
99
|
+
)
|
|
100
|
+
err_msg += f"Code snippet:\n{sql_error['snippet']}"
|
|
101
|
+
raise RuntimeError(err_msg)
|
|
102
|
+
|
|
90
103
|
raise RuntimeError(f"The program failed to compile: {status}")
|
|
91
104
|
|
|
92
105
|
logging.debug("still compiling %s, waiting for 100 more milliseconds", name)
|
|
@@ -103,13 +116,15 @@ class FelderaClient:
|
|
|
103
116
|
body = {
|
|
104
117
|
"name": pipeline.name,
|
|
105
118
|
"program_code": pipeline.program_code,
|
|
119
|
+
"udf_rust": pipeline.udf_rust,
|
|
120
|
+
"udf_toml": pipeline.udf_toml,
|
|
106
121
|
"program_config": pipeline.program_config,
|
|
107
122
|
"runtime_config": pipeline.runtime_config,
|
|
108
123
|
"description": pipeline.description or "",
|
|
109
124
|
}
|
|
110
125
|
|
|
111
126
|
self.http.post(
|
|
112
|
-
path=
|
|
127
|
+
path="/pipelines",
|
|
113
128
|
body=body,
|
|
114
129
|
)
|
|
115
130
|
|
|
@@ -123,6 +138,8 @@ class FelderaClient:
|
|
|
123
138
|
body = {
|
|
124
139
|
"name": pipeline.name,
|
|
125
140
|
"program_code": pipeline.program_code,
|
|
141
|
+
"udf_rust": pipeline.udf_rust,
|
|
142
|
+
"udf_toml": pipeline.udf_toml,
|
|
126
143
|
"program_config": pipeline.program_config,
|
|
127
144
|
"runtime_config": pipeline.runtime_config,
|
|
128
145
|
"description": pipeline.description or "",
|
|
@@ -154,7 +171,7 @@ class FelderaClient:
|
|
|
154
171
|
|
|
155
172
|
:param name: The name of the pipeline
|
|
156
173
|
"""
|
|
157
|
-
|
|
174
|
+
self.http.delete(
|
|
158
175
|
path=f"/pipelines/{name}",
|
|
159
176
|
)
|
|
160
177
|
|
|
@@ -188,9 +205,11 @@ class FelderaClient:
|
|
|
188
205
|
if status == "Running":
|
|
189
206
|
break
|
|
190
207
|
elif status == "Failed":
|
|
191
|
-
raise RuntimeError(
|
|
208
|
+
raise RuntimeError("Failed to start pipeline")
|
|
192
209
|
|
|
193
|
-
logging.debug(
|
|
210
|
+
logging.debug(
|
|
211
|
+
"still starting %s, waiting for 100 more milliseconds", pipeline_name
|
|
212
|
+
)
|
|
194
213
|
time.sleep(0.1)
|
|
195
214
|
|
|
196
215
|
def pause_pipeline(self, pipeline_name: str):
|
|
@@ -209,17 +228,18 @@ class FelderaClient:
|
|
|
209
228
|
if status == "Paused":
|
|
210
229
|
break
|
|
211
230
|
elif status == "Failed":
|
|
212
|
-
|
|
213
|
-
raise RuntimeError(f"Failed to pause pipeline")
|
|
231
|
+
raise RuntimeError("Failed to pause pipeline")
|
|
214
232
|
|
|
215
|
-
logging.debug(
|
|
233
|
+
logging.debug(
|
|
234
|
+
"still pausing %s, waiting for 100 more milliseconds", pipeline_name
|
|
235
|
+
)
|
|
216
236
|
time.sleep(0.1)
|
|
217
237
|
|
|
218
238
|
def shutdown_pipeline(self, pipeline_name: str):
|
|
219
239
|
"""
|
|
220
240
|
Shutdown a pipeline
|
|
221
241
|
|
|
222
|
-
:param pipeline_name: The name of the pipeline to
|
|
242
|
+
:param pipeline_name: The name of the pipeline to shut down
|
|
223
243
|
"""
|
|
224
244
|
|
|
225
245
|
self.http.post(
|
|
@@ -235,11 +255,16 @@ class FelderaClient:
|
|
|
235
255
|
if status == "Shutdown":
|
|
236
256
|
return
|
|
237
257
|
|
|
238
|
-
logging.debug(
|
|
258
|
+
logging.debug(
|
|
259
|
+
"still shutting down %s, waiting for 100 more milliseconds",
|
|
260
|
+
pipeline_name,
|
|
261
|
+
)
|
|
239
262
|
time.sleep(0.1)
|
|
240
263
|
|
|
241
264
|
# retry sending shutdown request as the pipline hasn't shutdown yet
|
|
242
|
-
logging.debug(
|
|
265
|
+
logging.debug(
|
|
266
|
+
"pipeline %s hasn't shutdown after %s s, retrying", pipeline_name, timeout
|
|
267
|
+
)
|
|
243
268
|
self.http.post(
|
|
244
269
|
path=f"/pipelines/{pipeline_name}/shutdown",
|
|
245
270
|
)
|
|
@@ -253,22 +278,25 @@ class FelderaClient:
|
|
|
253
278
|
if status == "Shutdown":
|
|
254
279
|
return
|
|
255
280
|
|
|
256
|
-
logging.debug(
|
|
281
|
+
logging.debug(
|
|
282
|
+
"still shutting down %s, waiting for 100 more milliseconds",
|
|
283
|
+
pipeline_name,
|
|
284
|
+
)
|
|
257
285
|
time.sleep(0.1)
|
|
258
286
|
|
|
259
287
|
raise RuntimeError(f"Failed to shutdown pipeline {pipeline_name}")
|
|
260
288
|
|
|
261
289
|
def push_to_pipeline(
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
290
|
+
self,
|
|
291
|
+
pipeline_name: str,
|
|
292
|
+
table_name: str,
|
|
293
|
+
format: str,
|
|
294
|
+
data: list[list | str | dict],
|
|
295
|
+
array: bool = False,
|
|
296
|
+
force: bool = False,
|
|
297
|
+
update_format: str = "raw",
|
|
298
|
+
json_flavor: str = None,
|
|
299
|
+
serialize: bool = True,
|
|
272
300
|
):
|
|
273
301
|
"""
|
|
274
302
|
Insert data into a pipeline
|
|
@@ -291,11 +319,27 @@ class FelderaClient:
|
|
|
291
319
|
if format not in ["json", "csv"]:
|
|
292
320
|
raise ValueError("format must be either 'json' or 'csv'")
|
|
293
321
|
|
|
294
|
-
if update_format not in [
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
322
|
+
if update_format not in [
|
|
323
|
+
"insert_delete",
|
|
324
|
+
"weighted",
|
|
325
|
+
"debezium",
|
|
326
|
+
"snowflake",
|
|
327
|
+
"raw",
|
|
328
|
+
]:
|
|
329
|
+
raise ValueError(
|
|
330
|
+
"update_format must be one of 'insert_delete', 'weighted', 'debezium', 'snowflake', 'raw'"
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
if json_flavor is not None and json_flavor not in [
|
|
334
|
+
"default",
|
|
335
|
+
"debezium_mysql",
|
|
336
|
+
"snowflake",
|
|
337
|
+
"kafka_connect_json_converter",
|
|
338
|
+
"pandas",
|
|
339
|
+
]:
|
|
340
|
+
raise ValueError(
|
|
341
|
+
"json_flavor must be one of 'default', 'debezium_mysql', 'snowflake', 'kafka_connect_json_converter', 'pandas'"
|
|
342
|
+
)
|
|
299
343
|
|
|
300
344
|
# python sends `True` which isn't accepted by the backend
|
|
301
345
|
array = _prepare_boolean_input(array)
|
|
@@ -328,13 +372,13 @@ class FelderaClient:
|
|
|
328
372
|
)
|
|
329
373
|
|
|
330
374
|
def listen_to_pipeline(
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
375
|
+
self,
|
|
376
|
+
pipeline_name: str,
|
|
377
|
+
table_name: str,
|
|
378
|
+
format: str,
|
|
379
|
+
backpressure: bool = True,
|
|
380
|
+
array: bool = False,
|
|
381
|
+
timeout: Optional[float] = None,
|
|
338
382
|
):
|
|
339
383
|
"""
|
|
340
384
|
Listen for updates to views for pipeline, yields the chunks of data
|
|
@@ -375,3 +419,93 @@ class FelderaClient:
|
|
|
375
419
|
break
|
|
376
420
|
if chunk:
|
|
377
421
|
yield json.loads(chunk, parse_float=Decimal)
|
|
422
|
+
|
|
423
|
+
def query_as_text(
|
|
424
|
+
self, pipeline_name: str, query: str
|
|
425
|
+
) -> Generator[str, None, None]:
|
|
426
|
+
"""
|
|
427
|
+
Executes an ad-hoc query on the specified pipeline and returns a generator that yields lines of the table.
|
|
428
|
+
|
|
429
|
+
:param pipeline_name: The name of the pipeline to query.
|
|
430
|
+
:param query: The SQL query to be executed.
|
|
431
|
+
:return: A generator yielding the query result in tabular format, one line at a time.
|
|
432
|
+
"""
|
|
433
|
+
params = {
|
|
434
|
+
"pipeline_name": pipeline_name,
|
|
435
|
+
"sql": query,
|
|
436
|
+
"format": "text",
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
resp = self.http.get(
|
|
440
|
+
path=f"/pipelines/{pipeline_name}/query",
|
|
441
|
+
params=params,
|
|
442
|
+
stream=True,
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
chunk: bytes
|
|
446
|
+
for chunk in resp.iter_lines(chunk_size=50000000):
|
|
447
|
+
if chunk:
|
|
448
|
+
yield chunk.decode("utf-8")
|
|
449
|
+
|
|
450
|
+
def query_as_parquet(self, pipeline_name: str, query: str, path: str):
|
|
451
|
+
"""
|
|
452
|
+
Executes an ad-hoc query on the specified pipeline and saves the result to a parquet file.
|
|
453
|
+
If the extension isn't `parquet`, it will be automatically appended to `path`.
|
|
454
|
+
|
|
455
|
+
:param pipeline_name: The name of the pipeline to query.
|
|
456
|
+
:param query: The SQL query to be executed.
|
|
457
|
+
:param path: The path including the file name to save the resulting parquet file in.
|
|
458
|
+
"""
|
|
459
|
+
|
|
460
|
+
params = {
|
|
461
|
+
"pipeline_name": pipeline_name,
|
|
462
|
+
"sql": query,
|
|
463
|
+
"format": "parquet",
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
resp = self.http.get(
|
|
467
|
+
path=f"/pipelines/{pipeline_name}/query",
|
|
468
|
+
params=params,
|
|
469
|
+
stream=True,
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
path: pathlib.Path = pathlib.Path(path)
|
|
473
|
+
|
|
474
|
+
ext = ".parquet"
|
|
475
|
+
if path.suffix != ext:
|
|
476
|
+
path = path.with_suffix(ext)
|
|
477
|
+
|
|
478
|
+
file = open(path, "wb")
|
|
479
|
+
|
|
480
|
+
chunk: bytes
|
|
481
|
+
for chunk in resp.iter_content(chunk_size=1024):
|
|
482
|
+
if chunk:
|
|
483
|
+
file.write(chunk)
|
|
484
|
+
file.close()
|
|
485
|
+
|
|
486
|
+
def query_as_json(
|
|
487
|
+
self, pipeline_name: str, query: str
|
|
488
|
+
) -> Generator[dict, None, None]:
|
|
489
|
+
"""
|
|
490
|
+
Executes an ad-hoc query on the specified pipeline and returns the result as a generator that yields
|
|
491
|
+
rows of the query as Python dictionaries.
|
|
492
|
+
|
|
493
|
+
:param pipeline_name: The name of the pipeline to query.
|
|
494
|
+
:param query: The SQL query to be executed.
|
|
495
|
+
:return: A generator that yields each row of the result as a Python dictionary, deserialized from JSON.
|
|
496
|
+
"""
|
|
497
|
+
params = {
|
|
498
|
+
"pipeline_name": pipeline_name,
|
|
499
|
+
"sql": query,
|
|
500
|
+
"format": "json",
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
resp = self.http.get(
|
|
504
|
+
path=f"/pipelines/{pipeline_name}/query",
|
|
505
|
+
params=params,
|
|
506
|
+
stream=True,
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
for chunk in resp.iter_lines(chunk_size=50000000):
|
|
510
|
+
if chunk:
|
|
511
|
+
yield json.loads(chunk, parse_float=Decimal)
|
feldera/rest/pipeline.py
CHANGED
|
@@ -9,18 +9,22 @@ class Pipeline:
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
def __init__(
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
12
|
+
self,
|
|
13
|
+
name: str,
|
|
14
|
+
sql: str,
|
|
15
|
+
udf_rust: str,
|
|
16
|
+
udf_toml: str,
|
|
17
|
+
program_config: Mapping[str, Any],
|
|
18
|
+
runtime_config: Mapping[str, Any],
|
|
19
|
+
description: Optional[str] = None,
|
|
18
20
|
):
|
|
19
21
|
"""
|
|
20
22
|
Initializes a new pipeline
|
|
21
23
|
|
|
22
24
|
:param name: The name of the pipeline
|
|
23
25
|
:param sql: The SQL code of the pipeline
|
|
26
|
+
:param udf_rust: Rust code for UDFs
|
|
27
|
+
:param udf_toml: Rust dependencies required by UDFs (in the TOML format)
|
|
24
28
|
:param program_config: The program config of the pipeline
|
|
25
29
|
:param runtime_config: The configuration of the pipeline
|
|
26
30
|
:param description: Optional. The description of the pipeline
|
|
@@ -28,6 +32,8 @@ class Pipeline:
|
|
|
28
32
|
|
|
29
33
|
self.name: str = name
|
|
30
34
|
self.program_code: str = sql.strip()
|
|
35
|
+
self.udf_rust: str = udf_rust
|
|
36
|
+
self.udf_toml: str = udf_toml
|
|
31
37
|
self.description: Optional[str] = description
|
|
32
38
|
self.program_config: Mapping[str, Any] = program_config
|
|
33
39
|
self.runtime_config: Mapping[str, Any] = runtime_config
|
|
@@ -44,13 +50,15 @@ class Pipeline:
|
|
|
44
50
|
self.deployment_error: Optional[dict] = None
|
|
45
51
|
self.deployment_location: Optional[str] = None
|
|
46
52
|
self.program_binary_url: Optional[str] = None
|
|
47
|
-
self.program_info: Optional[dict] =
|
|
53
|
+
self.program_info: Optional[dict] = (
|
|
54
|
+
None # info about input & output connectors and the schema
|
|
55
|
+
)
|
|
48
56
|
self.program_status: Optional[str] = None
|
|
49
57
|
self.program_status_since: Optional[str] = None
|
|
50
58
|
|
|
51
59
|
@classmethod
|
|
52
60
|
def from_dict(cls, d: Mapping[str, Any]):
|
|
53
|
-
pipeline = cls("", "", {}, {})
|
|
61
|
+
pipeline = cls("", "", "", "", {}, {})
|
|
54
62
|
pipeline.__dict__ = d
|
|
55
63
|
pipeline.tables = []
|
|
56
64
|
pipeline.views = []
|
|
@@ -58,11 +66,11 @@ class Pipeline:
|
|
|
58
66
|
info = d.get("program_info")
|
|
59
67
|
|
|
60
68
|
if info is not None:
|
|
61
|
-
for i in info[
|
|
69
|
+
for i in info["schema"]["inputs"]:
|
|
62
70
|
tbl = SQLTable.from_dict(i)
|
|
63
71
|
pipeline.tables.append(tbl)
|
|
64
72
|
|
|
65
|
-
for output in info[
|
|
73
|
+
for output in info["schema"]["outputs"]:
|
|
66
74
|
v = SQLView.from_dict(output)
|
|
67
75
|
pipeline.views.append(v)
|
|
68
76
|
|
feldera/rest/sql_table.py
CHANGED
|
@@ -3,7 +3,13 @@ class SQLTable:
|
|
|
3
3
|
Represents a SQL table in Feldera
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
def __init__(
|
|
6
|
+
def __init__(
|
|
7
|
+
self,
|
|
8
|
+
name: str,
|
|
9
|
+
fields: list[dict],
|
|
10
|
+
case_sensitive: bool = False,
|
|
11
|
+
materialized: bool = False,
|
|
12
|
+
):
|
|
7
13
|
self.name = name
|
|
8
14
|
self.case_sensitive = case_sensitive
|
|
9
15
|
self.materialized = materialized
|
|
@@ -11,7 +17,7 @@ class SQLTable:
|
|
|
11
17
|
|
|
12
18
|
@classmethod
|
|
13
19
|
def from_dict(self, table_dict: dict):
|
|
14
|
-
tbl = SQLTable(name=table_dict[
|
|
15
|
-
tbl.case_sensitive = table_dict[
|
|
16
|
-
tbl.materialized = table_dict[
|
|
20
|
+
tbl = SQLTable(name=table_dict["name"], fields=table_dict["fields"])
|
|
21
|
+
tbl.case_sensitive = table_dict["case_sensitive"]
|
|
22
|
+
tbl.materialized = table_dict["materialized"]
|
|
17
23
|
return tbl
|