confluent-sql 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- confluent_sql/__init__.py +64 -0
- confluent_sql/__version__.py +10 -0
- confluent_sql/changelog_compressor.py +603 -0
- confluent_sql/connection.py +1007 -0
- confluent_sql/cursor.py +804 -0
- confluent_sql/exceptions.py +209 -0
- confluent_sql/execution_mode.py +34 -0
- confluent_sql/result_readers.py +663 -0
- confluent_sql/statement.py +566 -0
- confluent_sql/types.py +1606 -0
- confluent_sql-0.1.0.dist-info/METADATA +214 -0
- confluent_sql-0.1.0.dist-info/RECORD +14 -0
- confluent_sql-0.1.0.dist-info/WHEEL +4 -0
- confluent_sql-0.1.0.dist-info/licenses/LICENSE.txt +203 -0
confluent_sql/types.py
ADDED
|
@@ -0,0 +1,1606 @@
|
|
|
1
|
+
"""Type conversions between Flink statement API string serializations and python representations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
from collections import Counter
|
|
8
|
+
from collections.abc import Iterable
|
|
9
|
+
from dataclasses import dataclass, fields, is_dataclass
|
|
10
|
+
from datetime import date, datetime, time, timedelta, timezone
|
|
11
|
+
from decimal import Decimal
|
|
12
|
+
from math import isinf, isnan
|
|
13
|
+
from types import NoneType
|
|
14
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Generic, Protocol, TypeAlias, TypeVar
|
|
15
|
+
|
|
16
|
+
from confluent_sql.exceptions import InterfaceError, TypeMismatchError
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from .connection import Connection
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
PyType = TypeVar("PyType")
|
|
25
|
+
"""The data type of the Python value being converted to/from Flink SQL representation by
|
|
26
|
+
a TypeConverter subclass."""
|
|
27
|
+
ResponseType = TypeVar("ResponseType")
|
|
28
|
+
"""The data type of the from-response-API-JSON-encoded value being converted from
|
|
29
|
+
in to_python_value()."""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
from .statement import Schema
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"ColumnTypeDefinition",
|
|
37
|
+
"StrAnyDict",
|
|
38
|
+
"StatementTypeConverter",
|
|
39
|
+
"TypeConverter",
|
|
40
|
+
"convert_statement_parameters",
|
|
41
|
+
"SqlNone",
|
|
42
|
+
"YearMonthInterval",
|
|
43
|
+
"TypeMismatchError",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
"""
|
|
47
|
+
Type conversion between the SQL results API and Python values, driven by the schema information
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
FromResponseScalarTypes: TypeAlias = str | bool | None
|
|
52
|
+
"""Describes all possible scalar encoding types returned from from-response API calls."""
|
|
53
|
+
|
|
54
|
+
# Row types are fully recursive and come to us in JSON as a nested list.
|
|
55
|
+
FromResponseTypes: TypeAlias = FromResponseScalarTypes | list["FromResponseTypes"]
|
|
56
|
+
"""
|
|
57
|
+
Describes all possible encoding types returned from from-response API calls, including
|
|
58
|
+
nested row types.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
StrAnyDict: TypeAlias = dict[str, Any]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class RowColumn:
|
|
66
|
+
"""Fields corresponding to statement.traits.schema.columns[].type.fields members.
|
|
67
|
+
Used when the column type is a ROW.
|
|
68
|
+
Would be identical to Column, but the field carrying the type information is named differently.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
name: str
|
|
72
|
+
field_type: ColumnTypeDefinition
|
|
73
|
+
description: str | None = None
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def type(self) -> ColumnTypeDefinition:
|
|
77
|
+
"""Alias for field_type to match Column. The API design is inconsistent here."""
|
|
78
|
+
return self.field_type
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
def from_response(cls, data: StrAnyDict) -> RowColumn:
|
|
82
|
+
column_type = ColumnTypeDefinition.from_response(data["field_type"])
|
|
83
|
+
return cls(name=data["name"], field_type=column_type, description=data.get("description"))
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass(kw_only=True)
|
|
87
|
+
class ColumnTypeDefinition:
|
|
88
|
+
"""Fields corresponding to statement.traits.schema.columns[].type members.
|
|
89
|
+
|
|
90
|
+
Describes the Flink-side type definition of a projected column.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
type: str
|
|
94
|
+
"""Flink name of the type, e.g., "INT", "STRING", "ROW", etc."""
|
|
95
|
+
nullable: bool
|
|
96
|
+
length: int | None = None
|
|
97
|
+
precision: int | None = None
|
|
98
|
+
scale: int | None = None
|
|
99
|
+
fractional_precision: int | None = None # if an interval type
|
|
100
|
+
resolution: str | None = None # if an interval type
|
|
101
|
+
key_type: ColumnTypeDefinition | None = None # if type == "MAP"
|
|
102
|
+
value_type: ColumnTypeDefinition | None = None # if type == "MAP"
|
|
103
|
+
element_type: ColumnTypeDefinition | None = None # if type == "ARRAY" or "MULTISET"
|
|
104
|
+
|
|
105
|
+
fields: list[RowColumn] | None = None
|
|
106
|
+
"""The interior fields of a ROW type, if applicable."""
|
|
107
|
+
|
|
108
|
+
class_name: str | None = None
|
|
109
|
+
"""The Flink-side class name of the structured data type (if applicable)."""
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def type_name(self) -> str:
|
|
113
|
+
"""Return the Flink type name. Aliasing for clarity."""
|
|
114
|
+
return self.type
|
|
115
|
+
|
|
116
|
+
@classmethod
|
|
117
|
+
def from_response(cls, data: StrAnyDict) -> ColumnTypeDefinition:
|
|
118
|
+
"""Create a ColumnTypeDefinition from JSON response data within from-API statement traits"""
|
|
119
|
+
|
|
120
|
+
element_type = key_type = value_type = None
|
|
121
|
+
|
|
122
|
+
column_type = data["type"]
|
|
123
|
+
|
|
124
|
+
if column_type in {"ARRAY", "MULTISET"}:
|
|
125
|
+
element_type = data.get("element_type")
|
|
126
|
+
if element_type is not None:
|
|
127
|
+
# Describes the element type of an ARRAY or a MULTISET.
|
|
128
|
+
# Promote from element type dict to a ColumnTypeDefinition
|
|
129
|
+
element_type = cls.from_response(data["element_type"])
|
|
130
|
+
|
|
131
|
+
elif column_type == "MAP":
|
|
132
|
+
# For MAP types, we need to parse key_type and value_type specially.
|
|
133
|
+
key_type = cls.from_response(data["key_type"])
|
|
134
|
+
value_type = cls.from_response(data["value_type"])
|
|
135
|
+
|
|
136
|
+
return cls(
|
|
137
|
+
type=column_type,
|
|
138
|
+
nullable=data["nullable"],
|
|
139
|
+
length=data.get("length"),
|
|
140
|
+
precision=data.get("precision"),
|
|
141
|
+
scale=data.get("scale"),
|
|
142
|
+
fractional_precision=data.get("fractional_precision"),
|
|
143
|
+
resolution=data.get("resolution"),
|
|
144
|
+
key_type=key_type,
|
|
145
|
+
value_type=value_type,
|
|
146
|
+
element_type=element_type,
|
|
147
|
+
fields=[RowColumn.from_response(field) for field in data.get("fields", [])]
|
|
148
|
+
if "fields" in data
|
|
149
|
+
else None,
|
|
150
|
+
class_name=data.get("class_name"),
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class StatementTypeConverter:
|
|
155
|
+
"""
|
|
156
|
+
Acts on behalf of a statement's Schema to convert from-API-JSON-changelog values to Python,
|
|
157
|
+
values. Drives per-column TypeConverter deserialization to python types based on the schema.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
_schema: Schema
|
|
161
|
+
_type_converters: list[TypeConverter]
|
|
162
|
+
|
|
163
|
+
def __init__(self, connection: Connection, schema: Schema):
|
|
164
|
+
self._schema = schema
|
|
165
|
+
self._type_converters = [
|
|
166
|
+
get_api_type_converter(connection, col.type) for col in schema.columns
|
|
167
|
+
]
|
|
168
|
+
|
|
169
|
+
def to_python_row(self, sql_row: list[FromResponseTypes]) -> tuple[SupportedPythonTypes, ...]:
|
|
170
|
+
"""Convert a SQL row (list of from-results-API encoded values) to a Python row
|
|
171
|
+
(tuple of Python values) to be returned by a Cursor."""
|
|
172
|
+
return tuple(
|
|
173
|
+
converter.to_python_value(sql_value) # type: ignore[arg-type]
|
|
174
|
+
for converter, sql_value in zip(self._type_converters, sql_row, strict=True)
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class TypeConverter(Generic[PyType, ResponseType]):
|
|
179
|
+
"""Base class for all Flink <-> Python data type converters.
|
|
180
|
+
|
|
181
|
+
A TypeConverter handles conversion between a specific Flink SQL type's
|
|
182
|
+
representation in the statement API JSON responses and the corresponding
|
|
183
|
+
Python type.
|
|
184
|
+
|
|
185
|
+
Conversion from Flink SQL type to Python type is handled by the instance method
|
|
186
|
+
`to_python_value()`, which takes a from-response-API-JSON-encoded value and returns
|
|
187
|
+
the corresponding Python value, and may be hinted by the ColumnTypeDefinition
|
|
188
|
+
further clarifying the Flink-side type provided at construction time (from
|
|
189
|
+
the statement's schema).
|
|
190
|
+
|
|
191
|
+
Generic parameter PyType indicates the Python type handled by this converter --
|
|
192
|
+
the return type of to_python_value() (in addition to None, for nullable
|
|
193
|
+
columns) and the parameter type of to_statement_string().
|
|
194
|
+
|
|
195
|
+
Generic parameter ResponseType indicates the from-response-API-JSON-encoded type
|
|
196
|
+
handled by this converter -- the parameter type of to_python_value() (in addition
|
|
197
|
+
to None, for nullable columns).
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
PRIMARY_FLINK_TYPE_NAME: str
|
|
201
|
+
"""The primary Flink SQL type name that this TypeConverter handles."""
|
|
202
|
+
|
|
203
|
+
_column_type: ColumnTypeDefinition
|
|
204
|
+
|
|
205
|
+
def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
|
|
206
|
+
self._connection = connection
|
|
207
|
+
self._column_type = column_type
|
|
208
|
+
|
|
209
|
+
def to_python_value(self, response_value: ResponseType | None) -> PyType | None:
|
|
210
|
+
"""Convert from statement-response-API-JSON representation to its Python value.
|
|
211
|
+
|
|
212
|
+
All columns might also be nullable, in which case None should be returned.
|
|
213
|
+
"""
|
|
214
|
+
raise NotImplementedError("Subclasses should implement this method.") # pragma: no cover
|
|
215
|
+
|
|
216
|
+
@classmethod
|
|
217
|
+
def to_statement_string(cls, python_value: PyType) -> str:
|
|
218
|
+
"""Convert from Python value to its for-statement-string-interpolation representation."""
|
|
219
|
+
raise NotImplementedError("Subclasses should implement this method.") # pragma: no cover
|
|
220
|
+
|
|
221
|
+
def _check_to_python_param_type(
|
|
222
|
+
self,
|
|
223
|
+
expected_type: type[ResponseType],
|
|
224
|
+
value: Any,
|
|
225
|
+
) -> None:
|
|
226
|
+
"""Raises TypeMismatchError if the value is not of the expected from-response-API type."""
|
|
227
|
+
if not isinstance(value, expected_type):
|
|
228
|
+
raise TypeMismatchError(
|
|
229
|
+
converter_name=self.__class__.__name__,
|
|
230
|
+
method_name="to_python_value",
|
|
231
|
+
expected_type=expected_type.__name__,
|
|
232
|
+
bad_value=value,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
@classmethod
|
|
236
|
+
def _check_to_statement_string_param_type(
|
|
237
|
+
cls,
|
|
238
|
+
expected_type: type,
|
|
239
|
+
value: Any,
|
|
240
|
+
) -> None:
|
|
241
|
+
"""Raises TypeMismatchError if the value is not of the expected Python type."""
|
|
242
|
+
if not isinstance(value, expected_type):
|
|
243
|
+
raise TypeMismatchError(
|
|
244
|
+
converter_name=cls.__name__,
|
|
245
|
+
method_name="to_statement_string",
|
|
246
|
+
expected_type=expected_type.__name__,
|
|
247
|
+
bad_value=value,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def get_api_type_converter(
|
|
252
|
+
connection: Connection, column_type: ColumnTypeDefinition
|
|
253
|
+
) -> TypeConverter:
|
|
254
|
+
"""Return the appropriate TypeConverter for a given from-Statement-JSON type description."""
|
|
255
|
+
# Find the appropriate converter class mapped from the Flink type name
|
|
256
|
+
cls = _flink_type_name_to_converter_map.get(column_type.type_name)
|
|
257
|
+
if not cls:
|
|
258
|
+
# Another type mapping needed!
|
|
259
|
+
raise NotImplementedError(f"TypeConverter for {column_type.type_name} is not implemented.")
|
|
260
|
+
|
|
261
|
+
return cls(connection, column_type)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
class StringConverter(TypeConverter[str, str]):
|
|
265
|
+
"""Handles Flink types for CHAR, VARCHAR, STRING"""
|
|
266
|
+
|
|
267
|
+
PRIMARY_FLINK_TYPE_NAME = "STRING"
|
|
268
|
+
|
|
269
|
+
def to_python_value(self, response_value: str | None) -> str | None:
|
|
270
|
+
"""Expect string or None from the response value, return as-is or
|
|
271
|
+
raise TypeMismatchError."""
|
|
272
|
+
if response_value is None:
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
self._check_to_python_param_type(str, response_value)
|
|
276
|
+
|
|
277
|
+
return response_value
|
|
278
|
+
|
|
279
|
+
@classmethod
|
|
280
|
+
def to_statement_string(cls, python_value: str) -> str:
|
|
281
|
+
"""Convert a Python string value to its for-statement-string-interpolation
|
|
282
|
+
string literal representation."""
|
|
283
|
+
|
|
284
|
+
##
|
|
285
|
+
## Flink only uses single quotes to delimit string literals, and escapes
|
|
286
|
+
## single quotes inside string literals by doubling them.
|
|
287
|
+
##
|
|
288
|
+
## Backslash escaping is not supported in Flink SQL string literals -- that
|
|
289
|
+
## is, a backslash is just a normal character in a Flink SQL string literal.
|
|
290
|
+
##
|
|
291
|
+
## Backticks are used in Flink SQL to delimit identifiers, not string literals,
|
|
292
|
+
## and to have special meaning they must be the outermost delimiters. They
|
|
293
|
+
## do not need to be internally escaped in string literals.
|
|
294
|
+
##
|
|
295
|
+
|
|
296
|
+
cls._check_to_statement_string_param_type(str, python_value)
|
|
297
|
+
|
|
298
|
+
# Ensure we're dealing with a standard str here, and not a subclass
|
|
299
|
+
# that might do something "creative" when we do string operations on it.
|
|
300
|
+
python_value = str(python_value)
|
|
301
|
+
|
|
302
|
+
# Escape single quotes by doubling them
|
|
303
|
+
escaped_value = python_value.replace("'", "''")
|
|
304
|
+
|
|
305
|
+
# Return wrapped in single quotes
|
|
306
|
+
return f"'{escaped_value}'"
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
class VarBinaryConverter(TypeConverter[bytes, str]):
|
|
310
|
+
"""Handles Flink type VARBINARY"""
|
|
311
|
+
|
|
312
|
+
PRIMARY_FLINK_TYPE_NAME = "VARBINARY"
|
|
313
|
+
|
|
314
|
+
def to_python_value(self, response_value: str | None) -> bytes | None:
|
|
315
|
+
"""Expect hex-pair encoded string or None from the response value, return as bytes
|
|
316
|
+
or raise ValueError.
|
|
317
|
+
|
|
318
|
+
Examples: "x'7f0203'" <-> b"\x7f\x02\x03"
|
|
319
|
+
"""
|
|
320
|
+
if response_value is None:
|
|
321
|
+
return None
|
|
322
|
+
|
|
323
|
+
self._check_to_python_param_type(str, response_value)
|
|
324
|
+
|
|
325
|
+
if not (response_value.startswith("x'") and response_value.endswith("'")):
|
|
326
|
+
raise ValueError(
|
|
327
|
+
f"Expected hex-pair encoded string starting with x' and ending with ' "
|
|
328
|
+
f"for VarBinaryConverter but got {response_value}"
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
hex_string = response_value[2:-1] # Strip off the x' and trailing '
|
|
332
|
+
try:
|
|
333
|
+
return bytes.fromhex(hex_string)
|
|
334
|
+
except ValueError as e:
|
|
335
|
+
raise ValueError(f"Invalid hex string for VarBinaryConverter: {hex_string}") from e
|
|
336
|
+
|
|
337
|
+
@classmethod
|
|
338
|
+
def to_statement_string(cls, python_value: bytes) -> str:
|
|
339
|
+
"""Convert a Python bytes value to its for-statement-string-interpolation
|
|
340
|
+
representation.
|
|
341
|
+
|
|
342
|
+
Examples: b"\x7f\x02\x03" -> "x'7f0203'"
|
|
343
|
+
"""
|
|
344
|
+
cls._check_to_statement_string_param_type(bytes, python_value)
|
|
345
|
+
|
|
346
|
+
hex_string = python_value.hex()
|
|
347
|
+
return f"x'{hex_string}'"
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
class IntegerConverter(TypeConverter[int, str]):
|
|
351
|
+
"""Handles Flink types for TINYINT, SMALLINT, INTEGER, BIGINT to/from Python int"""
|
|
352
|
+
|
|
353
|
+
PRIMARY_FLINK_TYPE_NAME = "INTEGER"
|
|
354
|
+
|
|
355
|
+
def to_python_value(self, response_value: str | None) -> int | None:
|
|
356
|
+
"""Expect string-encoded integer or None from the response value, return as int
|
|
357
|
+
or raise ValueError."""
|
|
358
|
+
if response_value is None:
|
|
359
|
+
return None
|
|
360
|
+
|
|
361
|
+
self._check_to_python_param_type(str, response_value)
|
|
362
|
+
|
|
363
|
+
return int(response_value)
|
|
364
|
+
|
|
365
|
+
@classmethod
|
|
366
|
+
def to_statement_string(cls, python_value: int) -> str:
|
|
367
|
+
"""Convert a Python integer value to its for-statement-string-interpolation
|
|
368
|
+
representation -- just bare integer, no quotes."""
|
|
369
|
+
cls._check_to_statement_string_param_type(int, python_value)
|
|
370
|
+
|
|
371
|
+
# Guard against "creative" types that pass as int but aren't really ints
|
|
372
|
+
# by recasting to int before stringifying.
|
|
373
|
+
|
|
374
|
+
return str(int(python_value))
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
class DecimalConverter(TypeConverter[Decimal, str]):
|
|
378
|
+
"""Handle fixed precision DECIMAL types, mapping to/from Python's decimal.Decimal"""
|
|
379
|
+
|
|
380
|
+
PRIMARY_FLINK_TYPE_NAME = "DECIMAL"
|
|
381
|
+
|
|
382
|
+
def to_python_value(self, response_value: str | None) -> Decimal | None:
|
|
383
|
+
"""Expect string-encoded decimal or None from the response value, return as str
|
|
384
|
+
or raise ValueError."""
|
|
385
|
+
if response_value is None:
|
|
386
|
+
return None
|
|
387
|
+
|
|
388
|
+
self._check_to_python_param_type(str, response_value)
|
|
389
|
+
|
|
390
|
+
return Decimal(response_value)
|
|
391
|
+
|
|
392
|
+
@classmethod
|
|
393
|
+
def to_statement_string(cls, python_value: Decimal) -> str:
|
|
394
|
+
"""Convert a Python Decimal value to its for-statement-string-interpolation
|
|
395
|
+
representation."""
|
|
396
|
+
|
|
397
|
+
cls._check_to_statement_string_param_type(Decimal, python_value)
|
|
398
|
+
|
|
399
|
+
# Must include explicit cast to DECIMAL to avoid Flink interpreting
|
|
400
|
+
# the literal as a DOUBLE.
|
|
401
|
+
|
|
402
|
+
# Must include precision and scale in the cast to get any decimal
|
|
403
|
+
# value with fractional part honored, otherwise Flink will
|
|
404
|
+
# truncate to integer.
|
|
405
|
+
precision = len(python_value.as_tuple().digits) # type: ignore[attr-defined]
|
|
406
|
+
scale = -python_value.as_tuple().exponent # type: ignore[attr-defined]
|
|
407
|
+
|
|
408
|
+
return f"cast('{python_value}' as decimal({precision},{scale}))"
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
class FloatConverter(TypeConverter[float, str]):
|
|
412
|
+
"""Handles Flink types for FLOAT, DOUBLE to/from Python float"""
|
|
413
|
+
|
|
414
|
+
PRIMARY_FLINK_TYPE_NAME = "DOUBLE"
|
|
415
|
+
|
|
416
|
+
# Special cases when coming from Flink string representation.
|
|
417
|
+
_transcendental_spellings = {
|
|
418
|
+
"NaN": float("nan"),
|
|
419
|
+
"Infinity": float("inf"),
|
|
420
|
+
"-Infinity": float("-inf"),
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
def to_python_value(self, response_value: str | None) -> float | None:
|
|
424
|
+
"""Expect string-encoded float or None from the response value, return as float
|
|
425
|
+
or raise ValueError."""
|
|
426
|
+
if response_value is None:
|
|
427
|
+
return None
|
|
428
|
+
|
|
429
|
+
self._check_to_python_param_type(str, response_value)
|
|
430
|
+
|
|
431
|
+
# Must specifically handle the Flink/Java spellings of NaN and infinities.
|
|
432
|
+
if float_repr := self._transcendental_spellings.get(response_value, None):
|
|
433
|
+
return float_repr
|
|
434
|
+
|
|
435
|
+
# Not a transcendental, parse as normal float.
|
|
436
|
+
return float(response_value)
|
|
437
|
+
|
|
438
|
+
@classmethod
|
|
439
|
+
def to_statement_string(cls, python_value: float) -> str:
|
|
440
|
+
"""Convert a Python float value to its for-statement-string-interpolation
|
|
441
|
+
representation as a Flink double.
|
|
442
|
+
|
|
443
|
+
Err on the side of casting to the higher-precision DOUBLE type to avoid
|
|
444
|
+
precision loss in FLOAT representation if the target type ended up
|
|
445
|
+
being DOUBLE.
|
|
446
|
+
"""
|
|
447
|
+
cls._check_to_statement_string_param_type(float, python_value)
|
|
448
|
+
|
|
449
|
+
# Check for NaN or Infinity, IEEEE 754 float representation allows these values, but Flink
|
|
450
|
+
# SQL convert-from-string does not (statement will crash at this time, but hopefully
|
|
451
|
+
# fixed soon. Flink does support these if, say, produced by avro Kafka, so ...).
|
|
452
|
+
if isnan(python_value) or isinf(python_value):
|
|
453
|
+
raise ValueError("Cannot convert NaN or Infinity to a Flink SQL float/double literal")
|
|
454
|
+
|
|
455
|
+
# Will be interpolated as a literal number in the statement, no quotes.
|
|
456
|
+
return str(python_value)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
class BooleanConverter(TypeConverter[bool, str]):
|
|
460
|
+
"""Handles Flink type BOOLEAN to/from Python bool"""
|
|
461
|
+
|
|
462
|
+
PRIMARY_FLINK_TYPE_NAME = "BOOLEAN"
|
|
463
|
+
|
|
464
|
+
def to_python_value(self, response_value: str | None) -> bool | None:
|
|
465
|
+
"""Expect string 'TRUE'/'FALSE' or None from the response value, return as bool
|
|
466
|
+
or raise ValueError."""
|
|
467
|
+
if response_value is None:
|
|
468
|
+
return None
|
|
469
|
+
|
|
470
|
+
self._check_to_python_param_type(str, response_value)
|
|
471
|
+
|
|
472
|
+
return response_value.lower() == "true"
|
|
473
|
+
|
|
474
|
+
@classmethod
|
|
475
|
+
def to_statement_string(cls, python_value: bool) -> str:
|
|
476
|
+
"""Convert a Python boolean value to its for-statement-string-interpolation
|
|
477
|
+
representation."""
|
|
478
|
+
cls._check_to_statement_string_param_type(bool, python_value)
|
|
479
|
+
return "TRUE" if python_value else "FALSE"
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
class SqlNone:
|
|
483
|
+
"""Marker class to indicate a parameter that should be treated as NULL
|
|
484
|
+
of a specific type.
|
|
485
|
+
|
|
486
|
+
As of time of writing, Flink SQL does not support bare NULL literals
|
|
487
|
+
in statements. NULL values must be cast to a specific type.
|
|
488
|
+
"""
|
|
489
|
+
|
|
490
|
+
# Static members for NULLs of common types, initialized at end of module.
|
|
491
|
+
INTEGER: SqlNone
|
|
492
|
+
VARCHAR: SqlNone
|
|
493
|
+
STRING: SqlNone
|
|
494
|
+
BOOLEAN: SqlNone
|
|
495
|
+
DECIMAL: SqlNone
|
|
496
|
+
FLOAT: SqlNone
|
|
497
|
+
DATE: SqlNone
|
|
498
|
+
TIME: SqlNone
|
|
499
|
+
TIMESTAMP: SqlNone
|
|
500
|
+
VARBINARY: SqlNone
|
|
501
|
+
YEAR_MONTH_INTERVAL: SqlNone
|
|
502
|
+
DAY_SECOND_INTERVAL: SqlNone
|
|
503
|
+
|
|
504
|
+
_known_types_regex: re.Pattern | None = None
|
|
505
|
+
"""Compiled regex pattern for known Flink type names, for validation."""
|
|
506
|
+
# (Initialized on first use based on _flink_type_name_to_converter_map keys.)
|
|
507
|
+
|
|
508
|
+
_parameterized_type_regex = re.compile(r"^(?:ARRAY|MAP|MULTISET|ROW)\b", re.IGNORECASE)
|
|
509
|
+
"""Compiled regex pattern for parameterized Flink type names."""
|
|
510
|
+
|
|
511
|
+
def __init__(self, python_or_flink_type: str | type):
|
|
512
|
+
if isinstance(python_or_flink_type, str):
|
|
513
|
+
# The caller provided a Flink type name directly.
|
|
514
|
+
# Validate the provided Flink type name using case-insensitive regexes.
|
|
515
|
+
|
|
516
|
+
if SqlNone._known_types_regex is None:
|
|
517
|
+
# Initialize the known types pattern on first use based on
|
|
518
|
+
# the registered type converter keys.
|
|
519
|
+
SqlNone._known_types_regex = re.compile(
|
|
520
|
+
r"^(?:"
|
|
521
|
+
+ "|".join(re.escape(t) for t in _flink_type_name_to_converter_map)
|
|
522
|
+
+ r")$",
|
|
523
|
+
re.IGNORECASE,
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
if not (
|
|
527
|
+
SqlNone._known_types_regex.match(python_or_flink_type)
|
|
528
|
+
or SqlNone._parameterized_type_regex.match(python_or_flink_type)
|
|
529
|
+
):
|
|
530
|
+
raise InterfaceError(f"Unknown Flink type name {python_or_flink_type}")
|
|
531
|
+
|
|
532
|
+
# Found in the map or is an annotated array type, roll with it as is.
|
|
533
|
+
flink_type_name = python_or_flink_type
|
|
534
|
+
else:
|
|
535
|
+
# Map from Python type to Flink SQL type name
|
|
536
|
+
converter_cls = _python_type_to_type_converter.get(python_or_flink_type)
|
|
537
|
+
if not converter_cls:
|
|
538
|
+
raise InterfaceError(
|
|
539
|
+
f"Cannot determine Flink SQL type name for Python type {python_or_flink_type}"
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
flink_type_name = converter_cls.PRIMARY_FLINK_TYPE_NAME
|
|
543
|
+
|
|
544
|
+
self._flink_type_name = flink_type_name
|
|
545
|
+
|
|
546
|
+
def __str__(self) -> str:
|
|
547
|
+
return f"cast (null as {self._flink_type_name})"
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
class NullResultConverter(TypeConverter[NoneType, NoneType]):
|
|
551
|
+
PRIMARY_FLINK_TYPE_NAME = "NULL"
|
|
552
|
+
"""Handles Flink NULL values to Python None. Only handles from
|
|
553
|
+
results -> Python None conversion"""
|
|
554
|
+
|
|
555
|
+
def to_python_value(self, response_value: NoneType) -> None:
|
|
556
|
+
"""Expect None from the response value, return None or raise ValueError."""
|
|
557
|
+
self._check_to_python_param_type(NoneType, response_value)
|
|
558
|
+
|
|
559
|
+
return None # noqa: PLR1711 # explicit return for clarity.
|
|
560
|
+
|
|
561
|
+
@classmethod
|
|
562
|
+
def to_statement_string(cls, python_value: NoneType) -> str:
|
|
563
|
+
raise InterfaceError(
|
|
564
|
+
"NullConverter cannot convert Python None to statement string directly. "
|
|
565
|
+
"Use AnnotatedNull to specify the desired SQL type for NULL parameters."
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
class SqlNoneConverter(TypeConverter[SqlNone, NoneType]):
|
|
570
|
+
"""Handles conversion of SqlNone to SQL NULL of specified type."""
|
|
571
|
+
|
|
572
|
+
# Have to say something here, but we're not ever going to be used
|
|
573
|
+
# to go from SQL NULL to Python SqlNone. We're one-way only,
|
|
574
|
+
# the opposite from NullResultConverter.
|
|
575
|
+
PRIMARY_FLINK_TYPE_NAME = ""
|
|
576
|
+
|
|
577
|
+
# Since is never used for Flink result -> Python conversion,
|
|
578
|
+
# this class is not registered _flink_type_name_to_converter_map.
|
|
579
|
+
|
|
580
|
+
def to_python_value(self, response_value: NoneType) -> None:
|
|
581
|
+
"""Never needed, as SqlNone is only for parameter conversion."""
|
|
582
|
+
raise InterfaceError(
|
|
583
|
+
"SqlNoneConverter cannot convert from response values to Python. "
|
|
584
|
+
"It is only for converting SqlNone parameters to SQL NULL strings."
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
@classmethod
|
|
588
|
+
def to_statement_string(cls, python_value: SqlNone) -> str:
|
|
589
|
+
"""Convert an SqlNone instance to its for-statement-string-interpolation
|
|
590
|
+
representation."""
|
|
591
|
+
cls._check_to_statement_string_param_type(SqlNone, python_value)
|
|
592
|
+
# SqlNone's str() includes the cast syntax to its embedded type.
|
|
593
|
+
return str(python_value)
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
class DateConverter(TypeConverter[date, str]):
|
|
597
|
+
"""Handles Flink DATE type to Python datetime.date"""
|
|
598
|
+
|
|
599
|
+
PRIMARY_FLINK_TYPE_NAME = "DATE"
|
|
600
|
+
|
|
601
|
+
def to_python_value(self, response_value: str | None) -> date | None:
|
|
602
|
+
"""Expect string-encoded date in 'YYYY-MM-DD' format or None from the response value,
|
|
603
|
+
return as datetime.date or raise ValueError."""
|
|
604
|
+
if response_value is None:
|
|
605
|
+
return None
|
|
606
|
+
|
|
607
|
+
self._check_to_python_param_type(str, response_value)
|
|
608
|
+
|
|
609
|
+
try:
|
|
610
|
+
date = datetime.fromisoformat(response_value).date()
|
|
611
|
+
return date
|
|
612
|
+
except Exception as e:
|
|
613
|
+
raise ValueError(f"Invalid date string for DateConverter: {response_value}") from e
|
|
614
|
+
|
|
615
|
+
@classmethod
|
|
616
|
+
def to_statement_string(cls, python_value: date) -> str:
|
|
617
|
+
"""Convert a Python datetime.date value to its for-statement-string-interpolation
|
|
618
|
+
representation, quoted YYYY-MM-DD."""
|
|
619
|
+
|
|
620
|
+
cls._check_to_statement_string_param_type(date, python_value)
|
|
621
|
+
|
|
622
|
+
# Our use cases need the prefixed 'DATE' keyword, so include it here.
|
|
623
|
+
return f"DATE '{python_value.isoformat()}'"
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
class TimeConverter(TypeConverter[time, str]):
|
|
627
|
+
"""Handles Flink TIME type to Python datetime.time"""
|
|
628
|
+
|
|
629
|
+
PRIMARY_FLINK_TYPE_NAME = "TIME"
|
|
630
|
+
|
|
631
|
+
def to_python_value(self, response_value: str | None) -> time | None:
|
|
632
|
+
"""Expect string-encoded time in 'HH:MM:SS(.MMMMMM)' format or None from the response value,
|
|
633
|
+
return as datetime.time or raise ValueError."""
|
|
634
|
+
if response_value is None:
|
|
635
|
+
return None
|
|
636
|
+
|
|
637
|
+
self._check_to_python_param_type(str, response_value)
|
|
638
|
+
|
|
639
|
+
try:
|
|
640
|
+
return time.fromisoformat(response_value)
|
|
641
|
+
except Exception as e:
|
|
642
|
+
raise ValueError(f"Invalid time string for TimeConverter: {response_value}") from e
|
|
643
|
+
|
|
644
|
+
@classmethod
|
|
645
|
+
def to_statement_string(cls, python_value: time) -> str:
|
|
646
|
+
"""Convert a Python datetime.time value to its for-statement-string-interpolation
|
|
647
|
+
representation, quoted `' TIME HH:MM:SS.MMMMMM.XXXXX'`"""
|
|
648
|
+
|
|
649
|
+
cls._check_to_statement_string_param_type(time, python_value)
|
|
650
|
+
|
|
651
|
+
return f"TIME '{python_value.isoformat(timespec='microseconds')}'"
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
class TimestampConverter(TypeConverter[datetime, str]):
|
|
655
|
+
"""Handles converting Flink TIMESTAMP and TIMESTAMP_LTZ types to/from
|
|
656
|
+
Python datetime.datetime (with or with tzinfo).
|
|
657
|
+
|
|
658
|
+
When converting from Python datetime to Flink TIMESTAMP representation, if the
|
|
659
|
+
datetime carries tzinfo, it is transposed to the equivalent UTC time before conversion,
|
|
660
|
+
which should correspond to any submitted statement's default statement property
|
|
661
|
+
'sql.local-time-zone' default setting of UTC.
|
|
662
|
+
|
|
663
|
+
When converting from Flink TIMESTAMP type, a tz-naive datetime is returned.
|
|
664
|
+
When converting from Flink TIMESTAMP_LTZ type, a tz-aware datetime with tzinfo=UTC is returned.
|
|
665
|
+
|
|
666
|
+
Therefore, when round-tripping a tz-aware datetime through TIMESTAMP_LTZ, the original
|
|
667
|
+
tzinfo is lost (if not UTC) and replaced with UTC, but the instant in time is preserved.
|
|
668
|
+
|
|
669
|
+
When providing data intented for TIMESTAMP columns, tz-independent datetimes should be used.
|
|
670
|
+
When providing data intended for TIMESTAMP_LTZ columns, tz-aware datetimes should be used.
|
|
671
|
+
"""
|
|
672
|
+
|
|
673
|
+
PRIMARY_FLINK_TYPE_NAME = "TIMESTAMP"
|
|
674
|
+
|
|
675
|
+
def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
|
|
676
|
+
# Prevent confusion from possible aliases (test suite). Statement schema
|
|
677
|
+
# JSON spells these out canonically.
|
|
678
|
+
if column_type.type_name not in (
|
|
679
|
+
"TIMESTAMP_WITHOUT_TIME_ZONE",
|
|
680
|
+
"TIMESTAMP_WITH_LOCAL_TIME_ZONE",
|
|
681
|
+
):
|
|
682
|
+
raise ValueError(
|
|
683
|
+
f"TimestampConverter can only be used with TIMESTAMP_WITHOUT_TIME_ZONE or"
|
|
684
|
+
f" TIMESTAMP_WITH_LOCAL_TIME_ZONE types, got {column_type.type_name}"
|
|
685
|
+
)
|
|
686
|
+
super().__init__(connection, column_type)
|
|
687
|
+
|
|
688
|
+
@classmethod
|
|
689
|
+
def to_statement_string(cls, python_value: datetime) -> str:
|
|
690
|
+
"""Convert a Python datetime.datetime value to its for-statement-string-interpolation
|
|
691
|
+
representation, based on whether it has tzinfo or not."""
|
|
692
|
+
|
|
693
|
+
cls._check_to_statement_string_param_type(datetime, python_value)
|
|
694
|
+
|
|
695
|
+
# If has tzinfo, convert to UTC time w/o tzinfo for Flink TIMESTAMP_LTZ
|
|
696
|
+
if python_value.tzinfo is not None:
|
|
697
|
+
python_value = python_value.astimezone(tz=timezone.utc).replace(tzinfo=None)
|
|
698
|
+
# Must explicitly cast in the string forms ...
|
|
699
|
+
flink_type = "timestamp_ltz"
|
|
700
|
+
else:
|
|
701
|
+
flink_type = "timestamp"
|
|
702
|
+
|
|
703
|
+
iso_str = python_value.isoformat(sep=" ", timespec="microseconds")
|
|
704
|
+
return f"cast('{iso_str}' as {flink_type})"
|
|
705
|
+
|
|
706
|
+
def to_python_value(self, response_value: str | None) -> datetime | None:
|
|
707
|
+
"""Expect string-encoded timestamp in 'YYYY-MM-DD HH:MM:SS(.MMMMMM)' format
|
|
708
|
+
or None from the response value, return as datetime.datetime or raise ValueError.
|
|
709
|
+
|
|
710
|
+
If the column type is TIMESTAMP_LTZ, the returned datetime will have tzinfo=UTC,
|
|
711
|
+
otherwise it will be tz-naive.
|
|
712
|
+
"""
|
|
713
|
+
|
|
714
|
+
if response_value is None:
|
|
715
|
+
return None
|
|
716
|
+
|
|
717
|
+
self._check_to_python_param_type(str, response_value)
|
|
718
|
+
|
|
719
|
+
try:
|
|
720
|
+
# Should only be given TZ-free strings from Flink, otherwise the logic here
|
|
721
|
+
# may be rotten and should be reconsidered.
|
|
722
|
+
dt = datetime.fromisoformat(response_value)
|
|
723
|
+
|
|
724
|
+
except Exception as e:
|
|
725
|
+
raise ValueError(
|
|
726
|
+
f"Invalid timestamp string for TimestampConverter: {response_value}"
|
|
727
|
+
) from e
|
|
728
|
+
|
|
729
|
+
if dt.tzinfo is not None:
|
|
730
|
+
raise ValueError(
|
|
731
|
+
f"Expected timezone-naive timestamp string from Flink but got {response_value}"
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
# But if we're dealing with TIMESTAMP_LTZ, we should interpret
|
|
735
|
+
# the timestamp as being in UTC and set tzinfo accordingly.
|
|
736
|
+
if self._column_type.type_name == "TIMESTAMP_WITH_LOCAL_TIME_ZONE":
|
|
737
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
738
|
+
|
|
739
|
+
return dt
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
@dataclass
|
|
743
|
+
class YearMonthInterval:
|
|
744
|
+
"""Class representing a Flink YEAR TO MONTH interval with separate year and month components.
|
|
745
|
+
|
|
746
|
+
Negative intervals have negative years and/or months. When the years is negative,
|
|
747
|
+
the months should also be negative, and vice versa (so as to avoid ambiguity and to
|
|
748
|
+
represent negative months-only intervals). The smallest magnitude negative interval is
|
|
749
|
+
therefore 0 years and -1 month. When either years or months is non-positive, both will be,
|
|
750
|
+
and vice versa for positive intervals. Property `is_negative` can be used to check the sign.
|
|
751
|
+
|
|
752
|
+
(This differs from Python's timedelta, which represents less than one negative day
|
|
753
|
+
intervals by having negative days and positive seconds/microseconds, which, when
|
|
754
|
+
added together, end up at the right negative point in time (that is, not having
|
|
755
|
+
a zero days component when the total interval is negative but less than one day).)
|
|
756
|
+
|
|
757
|
+
The string representation is of the form '+-Y-M', with a leading '+' or '-' sign,
|
|
758
|
+
followed by the absolute value of years, a hyphen, and the absolute value of months
|
|
759
|
+
zero-padded to two digits.
|
|
760
|
+
"""
|
|
761
|
+
|
|
762
|
+
years: int
|
|
763
|
+
months: int
|
|
764
|
+
|
|
765
|
+
def __post_init__(self):
|
|
766
|
+
if not isinstance(self.years, int) or not isinstance(self.months, int):
|
|
767
|
+
raise TypeError("YearMonthInterval years and months must be integers.")
|
|
768
|
+
|
|
769
|
+
if (self.years < 0 and self.months > 0) or (self.years > 0 and self.months < 0):
|
|
770
|
+
raise ValueError("YearMonthInterval years and months must have the same sign.")
|
|
771
|
+
|
|
772
|
+
if abs(self.months) >= 12:
|
|
773
|
+
raise ValueError("YearMonthInterval months must be in the range -11 to 11.")
|
|
774
|
+
|
|
775
|
+
if abs(self.years) > 9999:
|
|
776
|
+
raise ValueError("YearMonthInterval years must be in the range -9999 to 9999")
|
|
777
|
+
|
|
778
|
+
@property
|
|
779
|
+
def is_negative(self) -> bool:
|
|
780
|
+
"""Return True if the interval is negative, False otherwise."""
|
|
781
|
+
return self.years < 0 or self.months < 0
|
|
782
|
+
|
|
783
|
+
def __str__(self) -> str:
|
|
784
|
+
sign = "-" if (self.years < 0 or self.months < 0) else "+"
|
|
785
|
+
return f"{sign}{abs(self.years)}-{abs(self.months):02d}"
|
|
786
|
+
|
|
787
|
+
# Rich comparison methods for vague parity with timedelta
|
|
788
|
+
def __lt__(self, other: Any) -> bool:
|
|
789
|
+
if not isinstance(other, YearMonthInterval):
|
|
790
|
+
return NotImplemented
|
|
791
|
+
return (self.years, self.months) < (other.years, other.months)
|
|
792
|
+
|
|
793
|
+
def __le__(self, other: Any) -> bool:
|
|
794
|
+
if not isinstance(other, YearMonthInterval):
|
|
795
|
+
return NotImplemented
|
|
796
|
+
return (self.years, self.months) <= (other.years, other.months)
|
|
797
|
+
|
|
798
|
+
def __eq__(self, other: Any) -> bool:
|
|
799
|
+
if not isinstance(other, YearMonthInterval):
|
|
800
|
+
return NotImplemented
|
|
801
|
+
return self.years == other.years and self.months == other.months
|
|
802
|
+
|
|
803
|
+
def __gt__(self, other: Any) -> bool:
|
|
804
|
+
if not isinstance(other, YearMonthInterval):
|
|
805
|
+
return NotImplemented
|
|
806
|
+
return (self.years, self.months) > (other.years, other.months)
|
|
807
|
+
|
|
808
|
+
def __ge__(self, other: Any) -> bool:
|
|
809
|
+
if not isinstance(other, YearMonthInterval):
|
|
810
|
+
return NotImplemented
|
|
811
|
+
return (self.years, self.months) >= (other.years, other.months)
|
|
812
|
+
|
|
813
|
+
def __ne__(self, other: Any) -> bool:
|
|
814
|
+
if not isinstance(other, YearMonthInterval):
|
|
815
|
+
return NotImplemented
|
|
816
|
+
return self.years != other.years or self.months != other.months
|
|
817
|
+
|
|
818
|
+
def __hash__(self) -> int:
|
|
819
|
+
"""Hash based on years and months, since overriding __eq__."""
|
|
820
|
+
return hash((self.years, self.months))
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
class YearMonthIntervalConverter(TypeConverter[YearMonthInterval, str]):
|
|
824
|
+
"""Handles Flink YEAR TO MONTH variant INTERVAL types as strings.
|
|
825
|
+
|
|
826
|
+
INTERVAL YEAR TO MONTH is mapped to Python YearMonthInterval dataclass. Its string
|
|
827
|
+
representation is of the form '+-Y-M', and the Flink schema type will be INTERVAL_YEAR_MONTH.
|
|
828
|
+
"""
|
|
829
|
+
|
|
830
|
+
PRIMARY_FLINK_TYPE_NAME = "INTERVAL_YEAR_MONTH"
|
|
831
|
+
|
|
832
|
+
def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
|
|
833
|
+
if column_type.type_name != "INTERVAL_YEAR_MONTH":
|
|
834
|
+
raise ValueError(
|
|
835
|
+
f"YearMonthIntervalConverter can only be used with INTERVAL_YEAR_MONTH types, "
|
|
836
|
+
f"got {column_type.type_name}"
|
|
837
|
+
)
|
|
838
|
+
super().__init__(connection, column_type)
|
|
839
|
+
|
|
840
|
+
def to_python_value(self, response_value: str | None) -> YearMonthInterval | None:
|
|
841
|
+
"""Expect string-encoded interval or None from the response value,
|
|
842
|
+
return as YearMonthInterval or raise ValueError."""
|
|
843
|
+
|
|
844
|
+
# Example: '+1-06' for interval of 1 year, 6 months.
|
|
845
|
+
if response_value is None:
|
|
846
|
+
return None
|
|
847
|
+
|
|
848
|
+
self._check_to_python_param_type(str, response_value)
|
|
849
|
+
|
|
850
|
+
# Parse the interval string into a YearMonthInterval
|
|
851
|
+
try:
|
|
852
|
+
sign, rest = response_value[0], response_value[1:]
|
|
853
|
+
years_str, months_str = rest.split("-", 1)
|
|
854
|
+
years = int(years_str)
|
|
855
|
+
months = int(months_str)
|
|
856
|
+
if sign == "-":
|
|
857
|
+
years = -years
|
|
858
|
+
months = -months
|
|
859
|
+
return YearMonthInterval(years=years, months=months)
|
|
860
|
+
except Exception as e:
|
|
861
|
+
raise ValueError(
|
|
862
|
+
f"Invalid interval string for YearMonthIntervalConverter: {response_value}"
|
|
863
|
+
) from e
|
|
864
|
+
|
|
865
|
+
@classmethod
|
|
866
|
+
def to_statement_string(cls, python_value: YearMonthInterval) -> str:
|
|
867
|
+
"""Convert a Python YearMonthInterval value representing an interval to its
|
|
868
|
+
for-statement-string-interpolation representation."""
|
|
869
|
+
cls._check_to_statement_string_param_type(YearMonthInterval, python_value)
|
|
870
|
+
|
|
871
|
+
interval_str = str(python_value)
|
|
872
|
+
return f"INTERVAL '{interval_str}' YEAR TO MONTH"
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
class DaysIntervalConverter(TypeConverter[timedelta, str]):
|
|
876
|
+
"""Handles Flink DAYS TO SECOND variant INTERVAL types as strings.
|
|
877
|
+
|
|
878
|
+
INTERVAL DAY TO SECOND is mapped to Python timedelta. Its string representation
|
|
879
|
+
is of the form '+-D HH:MM:SS.MMMMMM', and the Flink schema type will be
|
|
880
|
+
INTERVAL_DAY_TIME.
|
|
881
|
+
|
|
882
|
+
We have to take care when converting negative intervals carrying fractional
|
|
883
|
+
seconds, since Python's timedelta normalizes negative timedeltas in a surprising way,
|
|
884
|
+
expressing them with negative days and positive seconds/microseconds.
|
|
885
|
+
"""
|
|
886
|
+
|
|
887
|
+
PRIMARY_FLINK_TYPE_NAME = "INTERVAL_DAY_TIME"
|
|
888
|
+
|
|
889
|
+
_HOURS_TO_SECONDS_RE = re.compile(
|
|
890
|
+
r"^(?P<sign>[+-])(?P<days>\d+)\s(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})(?:\.(?P<micro>\d{1,6}))?$"
|
|
891
|
+
)
|
|
892
|
+
|
|
893
|
+
def to_python_value(self, response_value: str | None) -> timedelta | None:
|
|
894
|
+
"""Expect string-encoded interval or None from the response value,
|
|
895
|
+
return as str or raise ValueError."""
|
|
896
|
+
|
|
897
|
+
# Example: '+0 04:00:00.000' for interval of 0 days, 4 hours.
|
|
898
|
+
|
|
899
|
+
if response_value is None:
|
|
900
|
+
return None
|
|
901
|
+
|
|
902
|
+
self._check_to_python_param_type(str, response_value)
|
|
903
|
+
|
|
904
|
+
# Parse the interval string into a timedelta
|
|
905
|
+
# Examples:
|
|
906
|
+
# * '+1 12:30:45.123456' (positive days through to microseconds),
|
|
907
|
+
# * '-0 00:15:00' (negative 15 minutes, no fractional seconds)
|
|
908
|
+
try:
|
|
909
|
+
m = self._HOURS_TO_SECONDS_RE.match(response_value)
|
|
910
|
+
if not m:
|
|
911
|
+
raise ValueError(f"Invalid interval format: {response_value}")
|
|
912
|
+
|
|
913
|
+
days = int(m.group("days"))
|
|
914
|
+
hours = int(m.group("hours"))
|
|
915
|
+
minutes = int(m.group("minutes"))
|
|
916
|
+
seconds = int(m.group("seconds"))
|
|
917
|
+
|
|
918
|
+
micro_group = m.group("micro")
|
|
919
|
+
microseconds = int(micro_group.ljust(6, "0")) if micro_group else 0
|
|
920
|
+
|
|
921
|
+
# Build a positive timedelta first
|
|
922
|
+
td = timedelta(
|
|
923
|
+
days=days, hours=hours, minutes=minutes, seconds=seconds, microseconds=microseconds
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
# Negate if needed.
|
|
927
|
+
if m.group("sign") == "-":
|
|
928
|
+
td = -td
|
|
929
|
+
|
|
930
|
+
return td
|
|
931
|
+
except Exception as e:
|
|
932
|
+
raise ValueError(
|
|
933
|
+
f"Invalid interval string for IntervalConverter: {response_value}"
|
|
934
|
+
) from e
|
|
935
|
+
|
|
936
|
+
ZERO_TIMEDELTA = timedelta(0)
|
|
937
|
+
|
|
938
|
+
@classmethod
|
|
939
|
+
def to_statement_string(cls, python_value: timedelta) -> str:
|
|
940
|
+
"""Convert a Python timedelta value representing an interval to its
|
|
941
|
+
for-statement-string-interpolation representation."""
|
|
942
|
+
cls._check_to_statement_string_param_type(timedelta, python_value)
|
|
943
|
+
|
|
944
|
+
# If negative, convert to positive and remember sign to avoid negative timedelta
|
|
945
|
+
# normalization quirks (python normalizes to negative days, positive seconds/microseconds
|
|
946
|
+
# which end up representing the right point in timeline when all added together).
|
|
947
|
+
if python_value < cls.ZERO_TIMEDELTA:
|
|
948
|
+
# Make positive for field extraction.
|
|
949
|
+
python_value = -python_value
|
|
950
|
+
sign = "-"
|
|
951
|
+
else:
|
|
952
|
+
sign = "+"
|
|
953
|
+
|
|
954
|
+
# Collect integral days, hours, minutes, seconds, microseconds for Flink string
|
|
955
|
+
# representation.
|
|
956
|
+
total_seconds = int(python_value.total_seconds())
|
|
957
|
+
days, remainder = divmod(total_seconds, 86400)
|
|
958
|
+
hours, remainder = divmod(remainder, 3600)
|
|
959
|
+
minutes, seconds = divmod(remainder, 60)
|
|
960
|
+
microseconds = python_value.microseconds
|
|
961
|
+
|
|
962
|
+
interval_str = f"{sign}{days} {hours:02}:{minutes:02}:{seconds:02}"
|
|
963
|
+
if microseconds > 0:
|
|
964
|
+
interval_str += f".{microseconds:06}"
|
|
965
|
+
precision = "(6)"
|
|
966
|
+
else:
|
|
967
|
+
precision = ""
|
|
968
|
+
|
|
969
|
+
return f"INTERVAL '{interval_str}' DAY TO SECOND{precision}"
|
|
970
|
+
|
|
971
|
+
|
|
972
|
+
class ArrayConverter(TypeConverter[list, list]):
|
|
973
|
+
"""Handles Flink ARRAY type to/from Python list.
|
|
974
|
+
|
|
975
|
+
Caveats:
|
|
976
|
+
* Nested lists / arrays are supported, but empty arrays are not (empty array literals
|
|
977
|
+
are not supported by Flink at this time).
|
|
978
|
+
* Nones in the list are supported, and will be converted to SQL NULLs of the
|
|
979
|
+
appropriate element type, however a list of all Nones is not supported since
|
|
980
|
+
the element type cannot be determined in that case.
|
|
981
|
+
"""
|
|
982
|
+
|
|
983
|
+
PRIMARY_FLINK_TYPE_NAME = "ARRAY"
|
|
984
|
+
|
|
985
|
+
_element_converter: TypeConverter
|
|
986
|
+
"""Type converter for array element type."""
|
|
987
|
+
|
|
988
|
+
def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
|
|
989
|
+
if column_type.type_name != "ARRAY":
|
|
990
|
+
raise InterfaceError(
|
|
991
|
+
f"ArrayConverter can only be used with ARRAY types, got {column_type.type_name}"
|
|
992
|
+
)
|
|
993
|
+
|
|
994
|
+
# Determine the element type's converter from the column_type's type parameters.
|
|
995
|
+
element_type_def = column_type.element_type
|
|
996
|
+
if not element_type_def:
|
|
997
|
+
raise InterfaceError(
|
|
998
|
+
"ArrayConverter cannot determine element type from column type definition."
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
element_converter_cls = _flink_type_name_to_converter_map.get(element_type_def.type_name)
|
|
1002
|
+
if not element_converter_cls:
|
|
1003
|
+
raise TypeError(
|
|
1004
|
+
f"Conversion for array element of type {element_type_def.type_name} is not"
|
|
1005
|
+
" implemented."
|
|
1006
|
+
)
|
|
1007
|
+
|
|
1008
|
+
self._element_converter = element_converter_cls(connection, element_type_def)
|
|
1009
|
+
|
|
1010
|
+
super().__init__(connection, column_type)
|
|
1011
|
+
|
|
1012
|
+
def to_python_value(self, response_value: list | None) -> list | None:
|
|
1013
|
+
"""Expect list or None from the response value, return as list or raise ValueError."""
|
|
1014
|
+
if response_value is None:
|
|
1015
|
+
return None
|
|
1016
|
+
|
|
1017
|
+
self._check_to_python_param_type(list, response_value)
|
|
1018
|
+
|
|
1019
|
+
response_value_converted = []
|
|
1020
|
+
for element in response_value:
|
|
1021
|
+
converted_element = self._element_converter.to_python_value(element)
|
|
1022
|
+
response_value_converted.append(converted_element)
|
|
1023
|
+
|
|
1024
|
+
return response_value_converted
|
|
1025
|
+
|
|
1026
|
+
@classmethod
|
|
1027
|
+
def to_statement_string(cls, python_value: list) -> str:
|
|
1028
|
+
"""Convert a Python list value to its for-statement-string-interpolation
|
|
1029
|
+
representation."""
|
|
1030
|
+
cls._check_to_statement_string_param_type(list, python_value)
|
|
1031
|
+
|
|
1032
|
+
if len(python_value) == 0:
|
|
1033
|
+
# Empty array, it seems that Flink does not support literal empty arrays grr boo hoo.
|
|
1034
|
+
# (as well as would make it hard for us to determine element type anyway to spell the
|
|
1035
|
+
# element type in an empty ARRAY<element_type> literal).
|
|
1036
|
+
raise ValueError("Cannot convert empty list to Flink ARRAY literal.")
|
|
1037
|
+
|
|
1038
|
+
# Convert each element to its string representation
|
|
1039
|
+
element_converter_cls = determine_element_converter_cls(python_value)
|
|
1040
|
+
none_element_str = SqlNone(element_converter_cls.PRIMARY_FLINK_TYPE_NAME).__str__()
|
|
1041
|
+
|
|
1042
|
+
element_strings = []
|
|
1043
|
+
|
|
1044
|
+
for element in python_value:
|
|
1045
|
+
# May raise ValueError if individual element is of wrong type.
|
|
1046
|
+
if element is not None:
|
|
1047
|
+
element_str = element_converter_cls.to_statement_string(element)
|
|
1048
|
+
else:
|
|
1049
|
+
element_str = none_element_str
|
|
1050
|
+
|
|
1051
|
+
element_strings.append(element_str)
|
|
1052
|
+
|
|
1053
|
+
# Join elements with commas and wrap in ARRAY[...]
|
|
1054
|
+
return f"ARRAY[{', '.join(element_strings)}]"
|
|
1055
|
+
|
|
1056
|
+
|
|
1057
|
+
class MapConverter(TypeConverter[dict, list]):
|
|
1058
|
+
"""Handles Flink MAP type to/from Python dict.
|
|
1059
|
+
|
|
1060
|
+
Caveats:
|
|
1061
|
+
* Empty python dicts are not supported since Flink does not support literal empty maps at this
|
|
1062
|
+
time.
|
|
1063
|
+
* Flink Map keys must be of a type that is hashable in Python.
|
|
1064
|
+
* Python dict keys and values may be None, which will be converted to SQL NULLs of the
|
|
1065
|
+
appropriate types, however a map with all keys or all values as None is not supported since
|
|
1066
|
+
the key/value types cannot be determined in that case.
|
|
1067
|
+
* Python dict keys and values must be of uniform type (or None), since Flink MAP types
|
|
1068
|
+
require uniform key and value types.
|
|
1069
|
+
"""
|
|
1070
|
+
|
|
1071
|
+
PRIMARY_FLINK_TYPE_NAME = "MAP"
|
|
1072
|
+
|
|
1073
|
+
key_converter: TypeConverter
|
|
1074
|
+
"""Type converter for map key type."""
|
|
1075
|
+
value_converter: TypeConverter
|
|
1076
|
+
"""Type converter for map value type."""
|
|
1077
|
+
|
|
1078
|
+
def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
|
|
1079
|
+
if column_type.type_name != "MAP":
|
|
1080
|
+
raise InterfaceError(
|
|
1081
|
+
f"MapConverter can only be used with MAP types, got {column_type.type_name}"
|
|
1082
|
+
)
|
|
1083
|
+
|
|
1084
|
+
# Determine the key and value type's converters from the column_type's key and value
|
|
1085
|
+
# type parameters.
|
|
1086
|
+
key_type_def = column_type.key_type
|
|
1087
|
+
value_type_def = column_type.value_type
|
|
1088
|
+
if not key_type_def:
|
|
1089
|
+
raise InterfaceError(
|
|
1090
|
+
"MapConverter cannot determine key type from column type definition."
|
|
1091
|
+
)
|
|
1092
|
+
if not value_type_def:
|
|
1093
|
+
raise InterfaceError(
|
|
1094
|
+
"MapConverter cannot determine value type from column type definition."
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1097
|
+
key_converter_cls = _flink_type_name_to_converter_map.get(key_type_def.type_name)
|
|
1098
|
+
if not key_converter_cls:
|
|
1099
|
+
raise TypeError(
|
|
1100
|
+
f"Conversion for map key of type {key_type_def.type_name} is not implemented."
|
|
1101
|
+
)
|
|
1102
|
+
|
|
1103
|
+
self.key_converter = key_converter_cls(connection, key_type_def)
|
|
1104
|
+
|
|
1105
|
+
value_converter_cls = _flink_type_name_to_converter_map.get(value_type_def.type_name)
|
|
1106
|
+
if not value_converter_cls:
|
|
1107
|
+
raise TypeError(
|
|
1108
|
+
f"Conversion for map value of type {value_type_def.type_name} is not implemented."
|
|
1109
|
+
)
|
|
1110
|
+
self.value_converter = value_converter_cls(connection, value_type_def)
|
|
1111
|
+
|
|
1112
|
+
super().__init__(connection, column_type)
|
|
1113
|
+
|
|
1114
|
+
def to_python_value(self, response_value: list | None) -> dict | None:
|
|
1115
|
+
"""Expect dict or None from the response value, return as dict or raise ValueError."""
|
|
1116
|
+
if response_value is None:
|
|
1117
|
+
return None
|
|
1118
|
+
|
|
1119
|
+
self._check_to_python_param_type(list, response_value)
|
|
1120
|
+
|
|
1121
|
+
# Will be a list of pair lists: [[enc-key1, enc-value1], [enc-key2, enc-value2], ...]
|
|
1122
|
+
# where keys and values will be the from-response encodings for their
|
|
1123
|
+
# types. Use the decoders for the key and value types for each pair.
|
|
1124
|
+
|
|
1125
|
+
result_dict = {}
|
|
1126
|
+
for pair in response_value:
|
|
1127
|
+
if not isinstance(pair, list) or len(pair) != 2:
|
|
1128
|
+
raise ValueError(
|
|
1129
|
+
f"Expected key-value pair list of length 2 for MapConverter but got: {pair}"
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
# Promote this key/value pair from from-response encodings to Python values.
|
|
1133
|
+
key = self.key_converter.to_python_value(pair[0])
|
|
1134
|
+
value = self.value_converter.to_python_value(pair[1])
|
|
1135
|
+
|
|
1136
|
+
result_dict[key] = value
|
|
1137
|
+
|
|
1138
|
+
return result_dict
|
|
1139
|
+
|
|
1140
|
+
@classmethod
|
|
1141
|
+
def to_statement_string(cls, python_value: dict) -> str:
|
|
1142
|
+
"""Convert a Python dict value to its for-statement-string-interpolation
|
|
1143
|
+
representation."""
|
|
1144
|
+
|
|
1145
|
+
# Example: MAP['key1', 12, 'key2', 22] for a map with string keys and integer values.
|
|
1146
|
+
|
|
1147
|
+
cls._check_to_statement_string_param_type(dict, python_value)
|
|
1148
|
+
|
|
1149
|
+
if len(python_value) == 0:
|
|
1150
|
+
# Empty map, it seems that Flink does not support literal empty maps grr boo hoo.
|
|
1151
|
+
raise ValueError("Cannot convert empty dict to Flink MAP literal.")
|
|
1152
|
+
|
|
1153
|
+
# Find the converter classes for keys and values
|
|
1154
|
+
key_converter_cls = determine_element_converter_cls(python_value.keys())
|
|
1155
|
+
value_converter_cls = determine_element_converter_cls(python_value.values())
|
|
1156
|
+
|
|
1157
|
+
none_key_str = SqlNone(key_converter_cls.PRIMARY_FLINK_TYPE_NAME).__str__()
|
|
1158
|
+
none_value_str = SqlNone(value_converter_cls.PRIMARY_FLINK_TYPE_NAME).__str__()
|
|
1159
|
+
|
|
1160
|
+
# Convert each key-value pair to its string representation, append each
|
|
1161
|
+
# to list to join later.
|
|
1162
|
+
keys_and_values: list[str] = []
|
|
1163
|
+
|
|
1164
|
+
for key, value in python_value.items():
|
|
1165
|
+
# May raise ValueError if individual key or value is of wrong type.
|
|
1166
|
+
if key is not None:
|
|
1167
|
+
key_str = key_converter_cls.to_statement_string(key)
|
|
1168
|
+
else:
|
|
1169
|
+
key_str = none_key_str
|
|
1170
|
+
|
|
1171
|
+
keys_and_values.append(key_str)
|
|
1172
|
+
|
|
1173
|
+
if value is not None:
|
|
1174
|
+
value_str = value_converter_cls.to_statement_string(value)
|
|
1175
|
+
else:
|
|
1176
|
+
value_str = none_value_str
|
|
1177
|
+
|
|
1178
|
+
keys_and_values.append(value_str)
|
|
1179
|
+
|
|
1180
|
+
# Join key-value pairs with commas and wrap in MAP[...]
|
|
1181
|
+
return f"MAP[{', '.join(keys_and_values)}]"
|
|
1182
|
+
|
|
1183
|
+
|
|
1184
|
+
class MultisetConverter(TypeConverter[Counter, list]):
|
|
1185
|
+
"""Handles Flink MULTISET type to/from Python collections.Counter.
|
|
1186
|
+
|
|
1187
|
+
A MULTISET is like a MAP from element to count, where the count is an integer
|
|
1188
|
+
representing the number of occurrences of the element in the multiset.
|
|
1189
|
+
This is mapped to Python's collections.Counter class.
|
|
1190
|
+
|
|
1191
|
+
The Counter must not be empty, since we need at least one non-None key element
|
|
1192
|
+
to determine the key type for conversion.
|
|
1193
|
+
"""
|
|
1194
|
+
|
|
1195
|
+
PRIMARY_FLINK_TYPE_NAME = "MULTISET"
|
|
1196
|
+
|
|
1197
|
+
element_converter: TypeConverter
|
|
1198
|
+
"""Type converter for the multiset's element / key type."""
|
|
1199
|
+
|
|
1200
|
+
int_converter: IntegerConverter
|
|
1201
|
+
"""Integer converter for the counts portion of the multiset."""
|
|
1202
|
+
|
|
1203
|
+
def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
|
|
1204
|
+
if column_type.type_name != "MULTISET":
|
|
1205
|
+
raise InterfaceError(
|
|
1206
|
+
f"MultisetConverter can only be used with MULTISET types, got {column_type.type_name}" # noqa: E501
|
|
1207
|
+
)
|
|
1208
|
+
|
|
1209
|
+
# Determine the element type's converter from the column_type's type parameters.
|
|
1210
|
+
element_type_def = column_type.element_type
|
|
1211
|
+
if not element_type_def:
|
|
1212
|
+
raise InterfaceError(
|
|
1213
|
+
"MultisetConverter cannot determine element type from column type definition."
|
|
1214
|
+
)
|
|
1215
|
+
|
|
1216
|
+
element_converter_cls = _flink_type_name_to_converter_map.get(element_type_def.type_name)
|
|
1217
|
+
if not element_converter_cls:
|
|
1218
|
+
raise TypeError(
|
|
1219
|
+
f"Conversion for multiset element of type {element_type_def.type_name} is not implemented." # noqa: E501
|
|
1220
|
+
)
|
|
1221
|
+
|
|
1222
|
+
self.element_converter = element_converter_cls(connection, element_type_def)
|
|
1223
|
+
|
|
1224
|
+
# Always use IntegerConverter for the corresponding counts.
|
|
1225
|
+
self.int_converter = IntegerConverter(
|
|
1226
|
+
connection, ColumnTypeDefinition(type="INTEGER", nullable=False)
|
|
1227
|
+
)
|
|
1228
|
+
|
|
1229
|
+
super().__init__(connection, column_type)
|
|
1230
|
+
|
|
1231
|
+
def to_python_value(self, response_value: list | None) -> Counter | None:
|
|
1232
|
+
"""Expect list of [element, count] pairs or None from the response value,
|
|
1233
|
+
return as Counter or raise ValueError."""
|
|
1234
|
+
if response_value is None:
|
|
1235
|
+
return None
|
|
1236
|
+
|
|
1237
|
+
self._check_to_python_param_type(list, response_value)
|
|
1238
|
+
|
|
1239
|
+
result_counter: Counter = Counter()
|
|
1240
|
+
for pair in response_value:
|
|
1241
|
+
if not isinstance(pair, list):
|
|
1242
|
+
raise InterfaceError(
|
|
1243
|
+
f"Expected to receive value+count list for MultisetConverter, but got {type(pair)} instead." # noqa: E501
|
|
1244
|
+
)
|
|
1245
|
+
try:
|
|
1246
|
+
left, right = pair
|
|
1247
|
+
except Exception as e:
|
|
1248
|
+
raise InterfaceError(
|
|
1249
|
+
f"Expected element + count pair list for MultisetConverter but got: {pair}"
|
|
1250
|
+
) from e
|
|
1251
|
+
|
|
1252
|
+
element = self.element_converter.to_python_value(left)
|
|
1253
|
+
if element is None:
|
|
1254
|
+
raise InterfaceError("Expected element for MultisetConverter but got None")
|
|
1255
|
+
|
|
1256
|
+
count = self.int_converter.to_python_value(right)
|
|
1257
|
+
if count is None:
|
|
1258
|
+
raise InterfaceError("Expected integer count for MultisetConverter but got None")
|
|
1259
|
+
|
|
1260
|
+
result_counter[element] = count
|
|
1261
|
+
|
|
1262
|
+
return result_counter
|
|
1263
|
+
|
|
1264
|
+
@classmethod
|
|
1265
|
+
def to_statement_string(cls, python_value: Counter) -> str:
|
|
1266
|
+
"""Flink does not currently support any literal MULTISET syntax."""
|
|
1267
|
+
raise InterfaceError("Flink does not currently support MULTISET literals.")
|
|
1268
|
+
|
|
1269
|
+
|
|
1270
|
+
class IsDataclass(Protocol):
|
|
1271
|
+
"""Protocol describing @dataclass instances, surprisingly enough there is no built-in one."""
|
|
1272
|
+
|
|
1273
|
+
__dataclass_fields__: ClassVar[dict[str, Any]]
|
|
1274
|
+
|
|
1275
|
+
|
|
1276
|
+
RowPythonTypes = tuple | IsDataclass
|
|
1277
|
+
"""The types that can be used to represent Flink ROW column values in Python:
|
|
1278
|
+
either tuple (including namedtuple() and typing.NamedTuple) or @dataclass instances."""
|
|
1279
|
+
|
|
1280
|
+
|
|
1281
|
+
class RowConverter(TypeConverter[RowPythonTypes, list]):
|
|
1282
|
+
"""Convert Flink ROW type to/from Python tuple, namedtuple, or @dataclass instances.
|
|
1283
|
+
|
|
1284
|
+
When converting from Flink ROW type, a namedtuple or @dataclass instance is returned,
|
|
1285
|
+
with field names corresponding to the ROW's field names. The class to use
|
|
1286
|
+
is cached globally based on the field names, so that multiple
|
|
1287
|
+
ROWs with the same field names share the same registered class (even across
|
|
1288
|
+
multiple RowConverter instances / separate queries or cursors).
|
|
1289
|
+
|
|
1290
|
+
The class to use for a given set of field names is obtained from the connection's
|
|
1291
|
+
row class registry, which will create a new collections.namedtuple class
|
|
1292
|
+
as needed.
|
|
1293
|
+
|
|
1294
|
+
When interpolating python tuples, namedtuples, or dataclasses into statements strings,
|
|
1295
|
+
the values are converted positionally field by field, and the resulting string is
|
|
1296
|
+
of the form "ROW(field1_value, field2_value, ...)".
|
|
1297
|
+
"""
|
|
1298
|
+
|
|
1299
|
+
PRIMARY_FLINK_TYPE_NAME = "ROW"
|
|
1300
|
+
|
|
1301
|
+
_field_converters: list[TypeConverter]
|
|
1302
|
+
"""List of TypeConverter instances for each field in the row, in order."""
|
|
1303
|
+
_field_names: list[str]
|
|
1304
|
+
"""List of field names in the row, in order."""
|
|
1305
|
+
_python_value_class: type[RowPythonTypes]
|
|
1306
|
+
"""The namedtuple or @dataclass class from the connection's row class registry
|
|
1307
|
+
corresponding to this row type's field names."""
|
|
1308
|
+
|
|
1309
|
+
def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
|
|
1310
|
+
if column_type.type_name != "ROW":
|
|
1311
|
+
raise InterfaceError(
|
|
1312
|
+
f"RowConverter can only be used with ROW types, got {column_type.type_name}"
|
|
1313
|
+
)
|
|
1314
|
+
|
|
1315
|
+
if not column_type.fields:
|
|
1316
|
+
raise InterfaceError("RowConverter requires column type definition with fields")
|
|
1317
|
+
|
|
1318
|
+
self._field_converters = []
|
|
1319
|
+
self._field_names = []
|
|
1320
|
+
|
|
1321
|
+
for field_def in column_type.fields:
|
|
1322
|
+
field_name = field_def.name
|
|
1323
|
+
self._field_names.append(field_name)
|
|
1324
|
+
|
|
1325
|
+
field_type_def = field_def.type
|
|
1326
|
+
if not field_type_def:
|
|
1327
|
+
raise InterfaceError(
|
|
1328
|
+
f"RowConverter cannot determine type for field '{field_name}'."
|
|
1329
|
+
)
|
|
1330
|
+
|
|
1331
|
+
field_converter_cls = _flink_type_name_to_converter_map.get(field_type_def.type_name)
|
|
1332
|
+
if not field_converter_cls:
|
|
1333
|
+
raise TypeError(
|
|
1334
|
+
f"Conversion for row field '{field_name}' of type "
|
|
1335
|
+
f"{field_type_def.type_name} is not implemented."
|
|
1336
|
+
)
|
|
1337
|
+
|
|
1338
|
+
field_converter = field_converter_cls(connection, field_type_def)
|
|
1339
|
+
self._field_converters.append(field_converter)
|
|
1340
|
+
|
|
1341
|
+
# Get or create the class for this row type's field names.
|
|
1342
|
+
self._python_value_class = connection._row_type_registry.get_row_class(self._field_names)
|
|
1343
|
+
|
|
1344
|
+
super().__init__(connection, column_type)
|
|
1345
|
+
|
|
1346
|
+
def to_python_value(self, response_value: list | None) -> RowPythonTypes | None:
|
|
1347
|
+
"""Expect list or None from the response value, return as registered class (or namedtuple)
|
|
1348
|
+
or raise InterfaceError."""
|
|
1349
|
+
if response_value is None:
|
|
1350
|
+
return None
|
|
1351
|
+
|
|
1352
|
+
self._check_to_python_param_type(list, response_value)
|
|
1353
|
+
|
|
1354
|
+
if len(response_value) != len(self._field_converters):
|
|
1355
|
+
raise InterfaceError(
|
|
1356
|
+
f"Expected {len(self._field_converters)} fields for RowConverter but got "
|
|
1357
|
+
f"{len(response_value)}"
|
|
1358
|
+
)
|
|
1359
|
+
|
|
1360
|
+
field_values = []
|
|
1361
|
+
for field_name, converter, field_value in zip(
|
|
1362
|
+
self._field_names, self._field_converters, response_value, strict=True
|
|
1363
|
+
):
|
|
1364
|
+
# Each converter may raise if field value is unexpected type, range, etc.
|
|
1365
|
+
try:
|
|
1366
|
+
converted_field_value = converter.to_python_value(field_value)
|
|
1367
|
+
except Exception as e:
|
|
1368
|
+
raise InterfaceError(
|
|
1369
|
+
f"Error converting field '{field_name}' value in RowConverter: {e}"
|
|
1370
|
+
) from e
|
|
1371
|
+
|
|
1372
|
+
field_values.append(converted_field_value)
|
|
1373
|
+
|
|
1374
|
+
# Return an instance of the registered class corresponding to the
|
|
1375
|
+
# ROW's field names with the converted field values.
|
|
1376
|
+
return self._python_value_class(*field_values)
|
|
1377
|
+
|
|
1378
|
+
@classmethod
|
|
1379
|
+
def handles_python_value(cls, python_value: Any) -> bool:
|
|
1380
|
+
"""Return True if the given python_value is a tuple, namedtuple, typing.NamedTuple,
|
|
1381
|
+
or @dataclass instance, False otherwise.
|
|
1382
|
+
|
|
1383
|
+
Assists `get_converter_for_python_value()` in determining the proper converter class
|
|
1384
|
+
for a given python value.
|
|
1385
|
+
"""
|
|
1386
|
+
|
|
1387
|
+
# collections.namedtuple and typing.NamedTuple will be instances of tuple, otherwise
|
|
1388
|
+
# we check for dataclass *instances*.
|
|
1389
|
+
return isinstance(python_value, tuple) or (
|
|
1390
|
+
is_dataclass(python_value) and not isinstance(python_value, type)
|
|
1391
|
+
)
|
|
1392
|
+
|
|
1393
|
+
@classmethod
|
|
1394
|
+
def to_statement_string(cls, python_value: RowPythonTypes) -> str:
|
|
1395
|
+
"""Convert a Python tuple, collections.namedtuple, typing.NamedTuple, or @dataclass
|
|
1396
|
+
instance to its for-statement-string-interpolation
|
|
1397
|
+
representation, "(ROW(field1_value, field2_value, ...))".
|
|
1398
|
+
|
|
1399
|
+
When providing a tuple or namedtuple, the values are taken positionally.
|
|
1400
|
+
When providing a dataclass instance, the field values are taken in the order
|
|
1401
|
+
of their declaration in the dataclass.
|
|
1402
|
+
|
|
1403
|
+
(The whole expression must be wrapped in parentheses when used in a larger expression,
|
|
1404
|
+
e.g., in an INSERT statement VALUES clause, otherwise strange parsing errors will occur.)
|
|
1405
|
+
"""
|
|
1406
|
+
|
|
1407
|
+
value_as_tuple: tuple
|
|
1408
|
+
|
|
1409
|
+
if isinstance(python_value, tuple):
|
|
1410
|
+
# User provided a plain tuple, namedtuple, or NamedTuple subclass: use as-is.
|
|
1411
|
+
value_as_tuple = python_value
|
|
1412
|
+
elif is_dataclass(python_value) and not isinstance(python_value, type):
|
|
1413
|
+
# Decompose dataclass instance to tuple of its field values.
|
|
1414
|
+
value_as_tuple = tuple(getattr(python_value, f.name) for f in fields(python_value))
|
|
1415
|
+
else:
|
|
1416
|
+
raise TypeMismatchError(
|
|
1417
|
+
converter_name=cls.__name__,
|
|
1418
|
+
method_name="to_statement_string",
|
|
1419
|
+
expected_type="tuple, namedtuple, NamedTuple, or dataclass",
|
|
1420
|
+
bad_value=python_value,
|
|
1421
|
+
)
|
|
1422
|
+
|
|
1423
|
+
field_strings: list[str] = []
|
|
1424
|
+
for field_value in value_as_tuple:
|
|
1425
|
+
# May raise InterfaceError if individual field is not of a handled type.
|
|
1426
|
+
field_converter_cls = get_converter_for_python_value(field_value)
|
|
1427
|
+
|
|
1428
|
+
field_str = field_converter_cls.to_statement_string(field_value)
|
|
1429
|
+
field_strings.append(field_str)
|
|
1430
|
+
|
|
1431
|
+
return f"(ROW({', '.join(field_strings)}))"
|
|
1432
|
+
|
|
1433
|
+
|
|
1434
|
+
_flink_type_name_to_converter_map: dict[str, type[TypeConverter]] = {
|
|
1435
|
+
# Null type
|
|
1436
|
+
"NULL": NullResultConverter,
|
|
1437
|
+
# Boolean type
|
|
1438
|
+
"BOOLEAN": BooleanConverter,
|
|
1439
|
+
# Integer types
|
|
1440
|
+
"TINYINT": IntegerConverter,
|
|
1441
|
+
"SMALLINT": IntegerConverter,
|
|
1442
|
+
"INTEGER": IntegerConverter,
|
|
1443
|
+
"BIGINT": IntegerConverter,
|
|
1444
|
+
# Fixed precision types
|
|
1445
|
+
"DECIMAL": DecimalConverter,
|
|
1446
|
+
"DEC": DecimalConverter,
|
|
1447
|
+
"NUMERIC": DecimalConverter,
|
|
1448
|
+
# Floating point types
|
|
1449
|
+
"FLOAT": FloatConverter,
|
|
1450
|
+
"DOUBLE": FloatConverter,
|
|
1451
|
+
"DOUBLE PRECISION": FloatConverter,
|
|
1452
|
+
# Date type
|
|
1453
|
+
"DATE": DateConverter,
|
|
1454
|
+
# Time type
|
|
1455
|
+
"TIME": TimeConverter,
|
|
1456
|
+
"TIME_WITHOUT_TIME_ZONE": TimeConverter,
|
|
1457
|
+
# Timestamp type
|
|
1458
|
+
"TIMESTAMP": TimestampConverter,
|
|
1459
|
+
"TIMESTAMP_WITHOUT_TIME_ZONE": TimestampConverter,
|
|
1460
|
+
"TIMESTAMP_LTZ": TimestampConverter,
|
|
1461
|
+
"TIMESTAMP_WITH_LOCAL_TIME_ZONE": TimestampConverter,
|
|
1462
|
+
# Interval types
|
|
1463
|
+
"INTERVAL_DAY_TIME": DaysIntervalConverter,
|
|
1464
|
+
"INTERVAL DAYS TO SECOND": DaysIntervalConverter,
|
|
1465
|
+
"INTERVAL_YEAR_MONTH": YearMonthIntervalConverter,
|
|
1466
|
+
"INTERVAL YEAR TO MONTH": YearMonthIntervalConverter,
|
|
1467
|
+
# String types
|
|
1468
|
+
"CHAR": StringConverter,
|
|
1469
|
+
"VARCHAR": StringConverter,
|
|
1470
|
+
"STRING": StringConverter,
|
|
1471
|
+
# Binary types
|
|
1472
|
+
"VARBINARY": VarBinaryConverter,
|
|
1473
|
+
"BINARY": VarBinaryConverter,
|
|
1474
|
+
"BYTES": VarBinaryConverter,
|
|
1475
|
+
# Array type
|
|
1476
|
+
"ARRAY": ArrayConverter,
|
|
1477
|
+
# Map type
|
|
1478
|
+
"MAP": MapConverter,
|
|
1479
|
+
# Multiset type
|
|
1480
|
+
"MULTISET": MultisetConverter,
|
|
1481
|
+
# Row type
|
|
1482
|
+
"ROW": RowConverter,
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1485
|
+
|
|
1486
|
+
_python_type_to_type_converter: dict[type, type[TypeConverter]] = {
|
|
1487
|
+
None.__class__: NullResultConverter,
|
|
1488
|
+
SqlNone: SqlNoneConverter,
|
|
1489
|
+
bool: BooleanConverter,
|
|
1490
|
+
int: IntegerConverter,
|
|
1491
|
+
Decimal: DecimalConverter,
|
|
1492
|
+
float: FloatConverter,
|
|
1493
|
+
date: DateConverter,
|
|
1494
|
+
time: TimeConverter,
|
|
1495
|
+
str: StringConverter,
|
|
1496
|
+
bytes: VarBinaryConverter,
|
|
1497
|
+
datetime: TimestampConverter,
|
|
1498
|
+
YearMonthInterval: YearMonthIntervalConverter,
|
|
1499
|
+
timedelta: DaysIntervalConverter,
|
|
1500
|
+
list: ArrayConverter,
|
|
1501
|
+
dict: MapConverter,
|
|
1502
|
+
Counter: MultisetConverter,
|
|
1503
|
+
tuple: RowConverter, # well, namedtuple is a duck-typed subclass of tuple
|
|
1504
|
+
}
|
|
1505
|
+
|
|
1506
|
+
SupportedPythonTypes: TypeAlias = (
|
|
1507
|
+
None.__class__
|
|
1508
|
+
| SqlNone
|
|
1509
|
+
| bool
|
|
1510
|
+
| int
|
|
1511
|
+
| Decimal
|
|
1512
|
+
| float
|
|
1513
|
+
| date
|
|
1514
|
+
| time
|
|
1515
|
+
| str
|
|
1516
|
+
| bytes
|
|
1517
|
+
| datetime
|
|
1518
|
+
| YearMonthInterval
|
|
1519
|
+
| timedelta
|
|
1520
|
+
| list
|
|
1521
|
+
| dict
|
|
1522
|
+
| Counter
|
|
1523
|
+
| tuple
|
|
1524
|
+
)
|
|
1525
|
+
|
|
1526
|
+
|
|
1527
|
+
# Initialize static SqlNone members for common types, must be done after class definition
|
|
1528
|
+
# and after the global type maps are defined.
|
|
1529
|
+
SqlNone.INTEGER = SqlNone("INTEGER")
|
|
1530
|
+
SqlNone.VARCHAR = SqlNone("VARCHAR")
|
|
1531
|
+
SqlNone.STRING = SqlNone("STRING")
|
|
1532
|
+
SqlNone.VARBINARY = SqlNone("VARBINARY")
|
|
1533
|
+
SqlNone.BOOLEAN = SqlNone("BOOLEAN")
|
|
1534
|
+
SqlNone.DECIMAL = SqlNone("DECIMAL")
|
|
1535
|
+
SqlNone.FLOAT = SqlNone("FLOAT")
|
|
1536
|
+
SqlNone.DATE = SqlNone("DATE")
|
|
1537
|
+
SqlNone.TIME = SqlNone("TIME")
|
|
1538
|
+
SqlNone.TIMESTAMP = SqlNone("TIMESTAMP")
|
|
1539
|
+
SqlNone.YEAR_MONTH_INTERVAL = SqlNone("INTERVAL YEAR TO MONTH")
|
|
1540
|
+
SqlNone.DAY_SECOND_INTERVAL = SqlNone("INTERVAL DAYS TO SECOND")
|
|
1541
|
+
|
|
1542
|
+
|
|
1543
|
+
def get_converter_for_python_value(python_value: SupportedPythonTypes) -> type[TypeConverter]:
|
|
1544
|
+
"""Get the TypeConverter class for the given Python value. Used prior to calling
|
|
1545
|
+
converter_class.to_statement_string().
|
|
1546
|
+
|
|
1547
|
+
Raises InterfaceError if the type is not supported.
|
|
1548
|
+
"""
|
|
1549
|
+
# Most converters can be found directly from the type of the value, other than
|
|
1550
|
+
# namedtuples which are duck-typed subclasses of tuple.
|
|
1551
|
+
value_type = type(python_value)
|
|
1552
|
+
|
|
1553
|
+
# Will find for most types, including if user has provided a plain tuple to be converted
|
|
1554
|
+
# to a ROW.
|
|
1555
|
+
converter_class = _python_type_to_type_converter.get(value_type)
|
|
1556
|
+
|
|
1557
|
+
# Otherwise check to see if RowConverter can handle it (namedtuple, NamedTuple, dataclass).
|
|
1558
|
+
if not converter_class and RowConverter.handles_python_value(python_value):
|
|
1559
|
+
converter_class = RowConverter
|
|
1560
|
+
|
|
1561
|
+
if not converter_class:
|
|
1562
|
+
raise InterfaceError(f"Conversion for parameter of type {value_type} is not implemented.")
|
|
1563
|
+
|
|
1564
|
+
return converter_class
|
|
1565
|
+
|
|
1566
|
+
|
|
1567
|
+
def convert_statement_parameters(
|
|
1568
|
+
parameters: tuple | list,
|
|
1569
|
+
) -> tuple:
|
|
1570
|
+
"""Convert a list or tuple of Python parameters to a tuple of their string representations
|
|
1571
|
+
for interpolation into a %s-laden statement string.
|
|
1572
|
+
|
|
1573
|
+
Returns: A tuple of string representations of the parameters.
|
|
1574
|
+
"""
|
|
1575
|
+
|
|
1576
|
+
# get_converter_for_python_value() may raise InterfaceError if any parameter's type is
|
|
1577
|
+
# not supported.
|
|
1578
|
+
return tuple(
|
|
1579
|
+
get_converter_for_python_value(param).to_statement_string(param) for param in parameters
|
|
1580
|
+
)
|
|
1581
|
+
|
|
1582
|
+
|
|
1583
|
+
def determine_element_converter_cls(python_value: Iterable) -> type[TypeConverter]:
|
|
1584
|
+
"""Determine the TypeConverter class for the elements of the given Python sequence.
|
|
1585
|
+
|
|
1586
|
+
Assumes the list is non-empty and that all elements are of the same type, or
|
|
1587
|
+
contains None elements. Cannot be all None. The list will already have
|
|
1588
|
+
been proven to be non-empty by the caller.
|
|
1589
|
+
|
|
1590
|
+
Returns: The TypeConverter class for the type of the first non-None element.
|
|
1591
|
+
|
|
1592
|
+
Raises: InterfaceError if the element type is not supported.
|
|
1593
|
+
"""
|
|
1594
|
+
for element in python_value:
|
|
1595
|
+
if element is not None:
|
|
1596
|
+
break
|
|
1597
|
+
else:
|
|
1598
|
+
raise InterfaceError("Cannot determine element type: all elements are None.")
|
|
1599
|
+
|
|
1600
|
+
# Will raise InterfaceError if type not supported.
|
|
1601
|
+
try:
|
|
1602
|
+
return get_converter_for_python_value(element)
|
|
1603
|
+
except InterfaceError as e:
|
|
1604
|
+
raise InterfaceError(
|
|
1605
|
+
f"Conversion for array element of type {type(element)} is not implemented."
|
|
1606
|
+
) from e
|