confluent-sql 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
confluent_sql/types.py ADDED
@@ -0,0 +1,1606 @@
1
+ """Type conversions between Flink statement API string serializations and python representations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import re
7
+ from collections import Counter
8
+ from collections.abc import Iterable
9
+ from dataclasses import dataclass, fields, is_dataclass
10
+ from datetime import date, datetime, time, timedelta, timezone
11
+ from decimal import Decimal
12
+ from math import isinf, isnan
13
+ from types import NoneType
14
+ from typing import TYPE_CHECKING, Any, ClassVar, Generic, Protocol, TypeAlias, TypeVar
15
+
16
+ from confluent_sql.exceptions import InterfaceError, TypeMismatchError
17
+
18
+ if TYPE_CHECKING:
19
+ from .connection import Connection
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ PyType = TypeVar("PyType")
25
+ """The data type of the Python value being converted to/from Flink SQL representation by
26
+ a TypeConverter subclass."""
27
+ ResponseType = TypeVar("ResponseType")
28
+ """The data type of the from-response-API-JSON-encoded value being converted from
29
+ in to_python_value()."""
30
+
31
+
32
+ if TYPE_CHECKING:
33
+ from .statement import Schema
34
+
35
+ __all__ = [
36
+ "ColumnTypeDefinition",
37
+ "StrAnyDict",
38
+ "StatementTypeConverter",
39
+ "TypeConverter",
40
+ "convert_statement_parameters",
41
+ "SqlNone",
42
+ "YearMonthInterval",
43
+ "TypeMismatchError",
44
+ ]
45
+
46
+ """
47
+ Type conversion between the SQL results API and Python values, driven by the schema information
48
+ """
49
+
50
+
51
+ FromResponseScalarTypes: TypeAlias = str | bool | None
52
+ """Describes all possible scalar encoding types returned from from-response API calls."""
53
+
54
+ # Row types are fully recursive and come to us in JSON as a nested list.
55
+ FromResponseTypes: TypeAlias = FromResponseScalarTypes | list["FromResponseTypes"]
56
+ """
57
+ Describes all possible encoding types returned from from-response API calls, including
58
+ nested row types.
59
+ """
60
+
61
+ StrAnyDict: TypeAlias = dict[str, Any]
62
+
63
+
64
+ @dataclass
65
+ class RowColumn:
66
+ """Fields corresponding to statement.traits.schema.columns[].type.fields members.
67
+ Used when the column type is a ROW.
68
+ Would be identical to Column, but the field carrying the type information is named differently.
69
+ """
70
+
71
+ name: str
72
+ field_type: ColumnTypeDefinition
73
+ description: str | None = None
74
+
75
+ @property
76
+ def type(self) -> ColumnTypeDefinition:
77
+ """Alias for field_type to match Column. The API design is inconsistent here."""
78
+ return self.field_type
79
+
80
+ @classmethod
81
+ def from_response(cls, data: StrAnyDict) -> RowColumn:
82
+ column_type = ColumnTypeDefinition.from_response(data["field_type"])
83
+ return cls(name=data["name"], field_type=column_type, description=data.get("description"))
84
+
85
+
86
+ @dataclass(kw_only=True)
87
+ class ColumnTypeDefinition:
88
+ """Fields corresponding to statement.traits.schema.columns[].type members.
89
+
90
+ Describes the Flink-side type definition of a projected column.
91
+ """
92
+
93
+ type: str
94
+ """Flink name of the type, e.g., "INT", "STRING", "ROW", etc."""
95
+ nullable: bool
96
+ length: int | None = None
97
+ precision: int | None = None
98
+ scale: int | None = None
99
+ fractional_precision: int | None = None # if an interval type
100
+ resolution: str | None = None # if an interval type
101
+ key_type: ColumnTypeDefinition | None = None # if type == "MAP"
102
+ value_type: ColumnTypeDefinition | None = None # if type == "MAP"
103
+ element_type: ColumnTypeDefinition | None = None # if type == "ARRAY" or "MULTISET"
104
+
105
+ fields: list[RowColumn] | None = None
106
+ """The interior fields of a ROW type, if applicable."""
107
+
108
+ class_name: str | None = None
109
+ """The Flink-side class name of the structured data type (if applicable)."""
110
+
111
+ @property
112
+ def type_name(self) -> str:
113
+ """Return the Flink type name. Aliasing for clarity."""
114
+ return self.type
115
+
116
+ @classmethod
117
+ def from_response(cls, data: StrAnyDict) -> ColumnTypeDefinition:
118
+ """Create a ColumnTypeDefinition from JSON response data within from-API statement traits"""
119
+
120
+ element_type = key_type = value_type = None
121
+
122
+ column_type = data["type"]
123
+
124
+ if column_type in {"ARRAY", "MULTISET"}:
125
+ element_type = data.get("element_type")
126
+ if element_type is not None:
127
+ # Describes the element type of an ARRAY or a MULTISET.
128
+ # Promote from element type dict to a ColumnTypeDefinition
129
+ element_type = cls.from_response(data["element_type"])
130
+
131
+ elif column_type == "MAP":
132
+ # For MAP types, we need to parse key_type and value_type specially.
133
+ key_type = cls.from_response(data["key_type"])
134
+ value_type = cls.from_response(data["value_type"])
135
+
136
+ return cls(
137
+ type=column_type,
138
+ nullable=data["nullable"],
139
+ length=data.get("length"),
140
+ precision=data.get("precision"),
141
+ scale=data.get("scale"),
142
+ fractional_precision=data.get("fractional_precision"),
143
+ resolution=data.get("resolution"),
144
+ key_type=key_type,
145
+ value_type=value_type,
146
+ element_type=element_type,
147
+ fields=[RowColumn.from_response(field) for field in data.get("fields", [])]
148
+ if "fields" in data
149
+ else None,
150
+ class_name=data.get("class_name"),
151
+ )
152
+
153
+
154
+ class StatementTypeConverter:
155
+ """
156
+ Acts on behalf of a statement's Schema to convert from-API-JSON-changelog values to Python,
157
+ values. Drives per-column TypeConverter deserialization to python types based on the schema.
158
+ """
159
+
160
+ _schema: Schema
161
+ _type_converters: list[TypeConverter]
162
+
163
+ def __init__(self, connection: Connection, schema: Schema):
164
+ self._schema = schema
165
+ self._type_converters = [
166
+ get_api_type_converter(connection, col.type) for col in schema.columns
167
+ ]
168
+
169
+ def to_python_row(self, sql_row: list[FromResponseTypes]) -> tuple[SupportedPythonTypes, ...]:
170
+ """Convert a SQL row (list of from-results-API encoded values) to a Python row
171
+ (tuple of Python values) to be returned by a Cursor."""
172
+ return tuple(
173
+ converter.to_python_value(sql_value) # type: ignore[arg-type]
174
+ for converter, sql_value in zip(self._type_converters, sql_row, strict=True)
175
+ )
176
+
177
+
178
+ class TypeConverter(Generic[PyType, ResponseType]):
179
+ """Base class for all Flink <-> Python data type converters.
180
+
181
+ A TypeConverter handles conversion between a specific Flink SQL type's
182
+ representation in the statement API JSON responses and the corresponding
183
+ Python type.
184
+
185
+ Conversion from Flink SQL type to Python type is handled by the instance method
186
+ `to_python_value()`, which takes a from-response-API-JSON-encoded value and returns
187
+ the corresponding Python value, and may be hinted by the ColumnTypeDefinition
188
+ further clarifying the Flink-side type provided at construction time (from
189
+ the statement's schema).
190
+
191
+ Generic parameter PyType indicates the Python type handled by this converter --
192
+ the return type of to_python_value() (in addition to None, for nullable
193
+ columns) and the parameter type of to_statement_string().
194
+
195
+ Generic parameter ResponseType indicates the from-response-API-JSON-encoded type
196
+ handled by this converter -- the parameter type of to_python_value() (in addition
197
+ to None, for nullable columns).
198
+ """
199
+
200
+ PRIMARY_FLINK_TYPE_NAME: str
201
+ """The primary Flink SQL type name that this TypeConverter handles."""
202
+
203
+ _column_type: ColumnTypeDefinition
204
+
205
+ def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
206
+ self._connection = connection
207
+ self._column_type = column_type
208
+
209
+ def to_python_value(self, response_value: ResponseType | None) -> PyType | None:
210
+ """Convert from statement-response-API-JSON representation to its Python value.
211
+
212
+ All columns might also be nullable, in which case None should be returned.
213
+ """
214
+ raise NotImplementedError("Subclasses should implement this method.") # pragma: no cover
215
+
216
+ @classmethod
217
+ def to_statement_string(cls, python_value: PyType) -> str:
218
+ """Convert from Python value to its for-statement-string-interpolation representation."""
219
+ raise NotImplementedError("Subclasses should implement this method.") # pragma: no cover
220
+
221
+ def _check_to_python_param_type(
222
+ self,
223
+ expected_type: type[ResponseType],
224
+ value: Any,
225
+ ) -> None:
226
+ """Raises TypeMismatchError if the value is not of the expected from-response-API type."""
227
+ if not isinstance(value, expected_type):
228
+ raise TypeMismatchError(
229
+ converter_name=self.__class__.__name__,
230
+ method_name="to_python_value",
231
+ expected_type=expected_type.__name__,
232
+ bad_value=value,
233
+ )
234
+
235
+ @classmethod
236
+ def _check_to_statement_string_param_type(
237
+ cls,
238
+ expected_type: type,
239
+ value: Any,
240
+ ) -> None:
241
+ """Raises TypeMismatchError if the value is not of the expected Python type."""
242
+ if not isinstance(value, expected_type):
243
+ raise TypeMismatchError(
244
+ converter_name=cls.__name__,
245
+ method_name="to_statement_string",
246
+ expected_type=expected_type.__name__,
247
+ bad_value=value,
248
+ )
249
+
250
+
251
+ def get_api_type_converter(
252
+ connection: Connection, column_type: ColumnTypeDefinition
253
+ ) -> TypeConverter:
254
+ """Return the appropriate TypeConverter for a given from-Statement-JSON type description."""
255
+ # Find the appropriate converter class mapped from the Flink type name
256
+ cls = _flink_type_name_to_converter_map.get(column_type.type_name)
257
+ if not cls:
258
+ # Another type mapping needed!
259
+ raise NotImplementedError(f"TypeConverter for {column_type.type_name} is not implemented.")
260
+
261
+ return cls(connection, column_type)
262
+
263
+
264
+ class StringConverter(TypeConverter[str, str]):
265
+ """Handles Flink types for CHAR, VARCHAR, STRING"""
266
+
267
+ PRIMARY_FLINK_TYPE_NAME = "STRING"
268
+
269
+ def to_python_value(self, response_value: str | None) -> str | None:
270
+ """Expect string or None from the response value, return as-is or
271
+ raise TypeMismatchError."""
272
+ if response_value is None:
273
+ return None
274
+
275
+ self._check_to_python_param_type(str, response_value)
276
+
277
+ return response_value
278
+
279
+ @classmethod
280
+ def to_statement_string(cls, python_value: str) -> str:
281
+ """Convert a Python string value to its for-statement-string-interpolation
282
+ string literal representation."""
283
+
284
+ ##
285
+ ## Flink only uses single quotes to delimit string literals, and escapes
286
+ ## single quotes inside string literals by doubling them.
287
+ ##
288
+ ## Backslash escaping is not supported in Flink SQL string literals -- that
289
+ ## is, a backslash is just a normal character in a Flink SQL string literal.
290
+ ##
291
+ ## Backticks are used in Flink SQL to delimit identifiers, not string literals,
292
+ ## and to have special meaning they must be the outermost delimiters. They
293
+ ## do not need to be internally escaped in string literals.
294
+ ##
295
+
296
+ cls._check_to_statement_string_param_type(str, python_value)
297
+
298
+ # Ensure we're dealing with a standard str here, and not a subclass
299
+ # that might do something "creative" when we do string operations on it.
300
+ python_value = str(python_value)
301
+
302
+ # Escape single quotes by doubling them
303
+ escaped_value = python_value.replace("'", "''")
304
+
305
+ # Return wrapped in single quotes
306
+ return f"'{escaped_value}'"
307
+
308
+
309
+ class VarBinaryConverter(TypeConverter[bytes, str]):
310
+ """Handles Flink type VARBINARY"""
311
+
312
+ PRIMARY_FLINK_TYPE_NAME = "VARBINARY"
313
+
314
+ def to_python_value(self, response_value: str | None) -> bytes | None:
315
+ """Expect hex-pair encoded string or None from the response value, return as bytes
316
+ or raise ValueError.
317
+
318
+ Examples: "x'7f0203'" <-> b"\x7f\x02\x03"
319
+ """
320
+ if response_value is None:
321
+ return None
322
+
323
+ self._check_to_python_param_type(str, response_value)
324
+
325
+ if not (response_value.startswith("x'") and response_value.endswith("'")):
326
+ raise ValueError(
327
+ f"Expected hex-pair encoded string starting with x' and ending with ' "
328
+ f"for VarBinaryConverter but got {response_value}"
329
+ )
330
+
331
+ hex_string = response_value[2:-1] # Strip off the x' and trailing '
332
+ try:
333
+ return bytes.fromhex(hex_string)
334
+ except ValueError as e:
335
+ raise ValueError(f"Invalid hex string for VarBinaryConverter: {hex_string}") from e
336
+
337
+ @classmethod
338
+ def to_statement_string(cls, python_value: bytes) -> str:
339
+ """Convert a Python bytes value to its for-statement-string-interpolation
340
+ representation.
341
+
342
+ Examples: b"\x7f\x02\x03" -> "x'7f0203'"
343
+ """
344
+ cls._check_to_statement_string_param_type(bytes, python_value)
345
+
346
+ hex_string = python_value.hex()
347
+ return f"x'{hex_string}'"
348
+
349
+
350
+ class IntegerConverter(TypeConverter[int, str]):
351
+ """Handles Flink types for TINYINT, SMALLINT, INTEGER, BIGINT to/from Python int"""
352
+
353
+ PRIMARY_FLINK_TYPE_NAME = "INTEGER"
354
+
355
+ def to_python_value(self, response_value: str | None) -> int | None:
356
+ """Expect string-encoded integer or None from the response value, return as int
357
+ or raise ValueError."""
358
+ if response_value is None:
359
+ return None
360
+
361
+ self._check_to_python_param_type(str, response_value)
362
+
363
+ return int(response_value)
364
+
365
+ @classmethod
366
+ def to_statement_string(cls, python_value: int) -> str:
367
+ """Convert a Python integer value to its for-statement-string-interpolation
368
+ representation -- just bare integer, no quotes."""
369
+ cls._check_to_statement_string_param_type(int, python_value)
370
+
371
+ # Guard against "creative" types that pass as int but aren't really ints
372
+ # by recasting to int before stringifying.
373
+
374
+ return str(int(python_value))
375
+
376
+
377
+ class DecimalConverter(TypeConverter[Decimal, str]):
378
+ """Handle fixed precision DECIMAL types, mapping to/from Python's decimal.Decimal"""
379
+
380
+ PRIMARY_FLINK_TYPE_NAME = "DECIMAL"
381
+
382
+ def to_python_value(self, response_value: str | None) -> Decimal | None:
383
+ """Expect string-encoded decimal or None from the response value, return as str
384
+ or raise ValueError."""
385
+ if response_value is None:
386
+ return None
387
+
388
+ self._check_to_python_param_type(str, response_value)
389
+
390
+ return Decimal(response_value)
391
+
392
+ @classmethod
393
+ def to_statement_string(cls, python_value: Decimal) -> str:
394
+ """Convert a Python Decimal value to its for-statement-string-interpolation
395
+ representation."""
396
+
397
+ cls._check_to_statement_string_param_type(Decimal, python_value)
398
+
399
+ # Must include explicit cast to DECIMAL to avoid Flink interpreting
400
+ # the literal as a DOUBLE.
401
+
402
+ # Must include precision and scale in the cast to get any decimal
403
+ # value with fractional part honored, otherwise Flink will
404
+ # truncate to integer.
405
+ precision = len(python_value.as_tuple().digits) # type: ignore[attr-defined]
406
+ scale = -python_value.as_tuple().exponent # type: ignore[attr-defined]
407
+
408
+ return f"cast('{python_value}' as decimal({precision},{scale}))"
409
+
410
+
411
+ class FloatConverter(TypeConverter[float, str]):
412
+ """Handles Flink types for FLOAT, DOUBLE to/from Python float"""
413
+
414
+ PRIMARY_FLINK_TYPE_NAME = "DOUBLE"
415
+
416
+ # Special cases when coming from Flink string representation.
417
+ _transcendental_spellings = {
418
+ "NaN": float("nan"),
419
+ "Infinity": float("inf"),
420
+ "-Infinity": float("-inf"),
421
+ }
422
+
423
+ def to_python_value(self, response_value: str | None) -> float | None:
424
+ """Expect string-encoded float or None from the response value, return as float
425
+ or raise ValueError."""
426
+ if response_value is None:
427
+ return None
428
+
429
+ self._check_to_python_param_type(str, response_value)
430
+
431
+ # Must specifically handle the Flink/Java spellings of NaN and infinities.
432
+ if float_repr := self._transcendental_spellings.get(response_value, None):
433
+ return float_repr
434
+
435
+ # Not a transcendental, parse as normal float.
436
+ return float(response_value)
437
+
438
+ @classmethod
439
+ def to_statement_string(cls, python_value: float) -> str:
440
+ """Convert a Python float value to its for-statement-string-interpolation
441
+ representation as a Flink double.
442
+
443
+ Err on the side of casting to the higher-precision DOUBLE type to avoid
444
+ precision loss in FLOAT representation if the target type ended up
445
+ being DOUBLE.
446
+ """
447
+ cls._check_to_statement_string_param_type(float, python_value)
448
+
449
+ # Check for NaN or Infinity, IEEEE 754 float representation allows these values, but Flink
450
+ # SQL convert-from-string does not (statement will crash at this time, but hopefully
451
+ # fixed soon. Flink does support these if, say, produced by avro Kafka, so ...).
452
+ if isnan(python_value) or isinf(python_value):
453
+ raise ValueError("Cannot convert NaN or Infinity to a Flink SQL float/double literal")
454
+
455
+ # Will be interpolated as a literal number in the statement, no quotes.
456
+ return str(python_value)
457
+
458
+
459
+ class BooleanConverter(TypeConverter[bool, str]):
460
+ """Handles Flink type BOOLEAN to/from Python bool"""
461
+
462
+ PRIMARY_FLINK_TYPE_NAME = "BOOLEAN"
463
+
464
+ def to_python_value(self, response_value: str | None) -> bool | None:
465
+ """Expect string 'TRUE'/'FALSE' or None from the response value, return as bool
466
+ or raise ValueError."""
467
+ if response_value is None:
468
+ return None
469
+
470
+ self._check_to_python_param_type(str, response_value)
471
+
472
+ return response_value.lower() == "true"
473
+
474
+ @classmethod
475
+ def to_statement_string(cls, python_value: bool) -> str:
476
+ """Convert a Python boolean value to its for-statement-string-interpolation
477
+ representation."""
478
+ cls._check_to_statement_string_param_type(bool, python_value)
479
+ return "TRUE" if python_value else "FALSE"
480
+
481
+
482
+ class SqlNone:
483
+ """Marker class to indicate a parameter that should be treated as NULL
484
+ of a specific type.
485
+
486
+ As of time of writing, Flink SQL does not support bare NULL literals
487
+ in statements. NULL values must be cast to a specific type.
488
+ """
489
+
490
+ # Static members for NULLs of common types, initialized at end of module.
491
+ INTEGER: SqlNone
492
+ VARCHAR: SqlNone
493
+ STRING: SqlNone
494
+ BOOLEAN: SqlNone
495
+ DECIMAL: SqlNone
496
+ FLOAT: SqlNone
497
+ DATE: SqlNone
498
+ TIME: SqlNone
499
+ TIMESTAMP: SqlNone
500
+ VARBINARY: SqlNone
501
+ YEAR_MONTH_INTERVAL: SqlNone
502
+ DAY_SECOND_INTERVAL: SqlNone
503
+
504
+ _known_types_regex: re.Pattern | None = None
505
+ """Compiled regex pattern for known Flink type names, for validation."""
506
+ # (Initialized on first use based on _flink_type_name_to_converter_map keys.)
507
+
508
+ _parameterized_type_regex = re.compile(r"^(?:ARRAY|MAP|MULTISET|ROW)\b", re.IGNORECASE)
509
+ """Compiled regex pattern for parameterized Flink type names."""
510
+
511
+ def __init__(self, python_or_flink_type: str | type):
512
+ if isinstance(python_or_flink_type, str):
513
+ # The caller provided a Flink type name directly.
514
+ # Validate the provided Flink type name using case-insensitive regexes.
515
+
516
+ if SqlNone._known_types_regex is None:
517
+ # Initialize the known types pattern on first use based on
518
+ # the registered type converter keys.
519
+ SqlNone._known_types_regex = re.compile(
520
+ r"^(?:"
521
+ + "|".join(re.escape(t) for t in _flink_type_name_to_converter_map)
522
+ + r")$",
523
+ re.IGNORECASE,
524
+ )
525
+
526
+ if not (
527
+ SqlNone._known_types_regex.match(python_or_flink_type)
528
+ or SqlNone._parameterized_type_regex.match(python_or_flink_type)
529
+ ):
530
+ raise InterfaceError(f"Unknown Flink type name {python_or_flink_type}")
531
+
532
+ # Found in the map or is an annotated array type, roll with it as is.
533
+ flink_type_name = python_or_flink_type
534
+ else:
535
+ # Map from Python type to Flink SQL type name
536
+ converter_cls = _python_type_to_type_converter.get(python_or_flink_type)
537
+ if not converter_cls:
538
+ raise InterfaceError(
539
+ f"Cannot determine Flink SQL type name for Python type {python_or_flink_type}"
540
+ )
541
+
542
+ flink_type_name = converter_cls.PRIMARY_FLINK_TYPE_NAME
543
+
544
+ self._flink_type_name = flink_type_name
545
+
546
+ def __str__(self) -> str:
547
+ return f"cast (null as {self._flink_type_name})"
548
+
549
+
550
+ class NullResultConverter(TypeConverter[NoneType, NoneType]):
551
+ PRIMARY_FLINK_TYPE_NAME = "NULL"
552
+ """Handles Flink NULL values to Python None. Only handles from
553
+ results -> Python None conversion"""
554
+
555
+ def to_python_value(self, response_value: NoneType) -> None:
556
+ """Expect None from the response value, return None or raise ValueError."""
557
+ self._check_to_python_param_type(NoneType, response_value)
558
+
559
+ return None # noqa: PLR1711 # explicit return for clarity.
560
+
561
+ @classmethod
562
+ def to_statement_string(cls, python_value: NoneType) -> str:
563
+ raise InterfaceError(
564
+ "NullConverter cannot convert Python None to statement string directly. "
565
+ "Use AnnotatedNull to specify the desired SQL type for NULL parameters."
566
+ )
567
+
568
+
569
+ class SqlNoneConverter(TypeConverter[SqlNone, NoneType]):
570
+ """Handles conversion of SqlNone to SQL NULL of specified type."""
571
+
572
+ # Have to say something here, but we're not ever going to be used
573
+ # to go from SQL NULL to Python SqlNone. We're one-way only,
574
+ # the opposite from NullResultConverter.
575
+ PRIMARY_FLINK_TYPE_NAME = ""
576
+
577
+ # Since is never used for Flink result -> Python conversion,
578
+ # this class is not registered _flink_type_name_to_converter_map.
579
+
580
+ def to_python_value(self, response_value: NoneType) -> None:
581
+ """Never needed, as SqlNone is only for parameter conversion."""
582
+ raise InterfaceError(
583
+ "SqlNoneConverter cannot convert from response values to Python. "
584
+ "It is only for converting SqlNone parameters to SQL NULL strings."
585
+ )
586
+
587
+ @classmethod
588
+ def to_statement_string(cls, python_value: SqlNone) -> str:
589
+ """Convert an SqlNone instance to its for-statement-string-interpolation
590
+ representation."""
591
+ cls._check_to_statement_string_param_type(SqlNone, python_value)
592
+ # SqlNone's str() includes the cast syntax to its embedded type.
593
+ return str(python_value)
594
+
595
+
596
+ class DateConverter(TypeConverter[date, str]):
597
+ """Handles Flink DATE type to Python datetime.date"""
598
+
599
+ PRIMARY_FLINK_TYPE_NAME = "DATE"
600
+
601
+ def to_python_value(self, response_value: str | None) -> date | None:
602
+ """Expect string-encoded date in 'YYYY-MM-DD' format or None from the response value,
603
+ return as datetime.date or raise ValueError."""
604
+ if response_value is None:
605
+ return None
606
+
607
+ self._check_to_python_param_type(str, response_value)
608
+
609
+ try:
610
+ date = datetime.fromisoformat(response_value).date()
611
+ return date
612
+ except Exception as e:
613
+ raise ValueError(f"Invalid date string for DateConverter: {response_value}") from e
614
+
615
+ @classmethod
616
+ def to_statement_string(cls, python_value: date) -> str:
617
+ """Convert a Python datetime.date value to its for-statement-string-interpolation
618
+ representation, quoted YYYY-MM-DD."""
619
+
620
+ cls._check_to_statement_string_param_type(date, python_value)
621
+
622
+ # Our use cases need the prefixed 'DATE' keyword, so include it here.
623
+ return f"DATE '{python_value.isoformat()}'"
624
+
625
+
626
+ class TimeConverter(TypeConverter[time, str]):
627
+ """Handles Flink TIME type to Python datetime.time"""
628
+
629
+ PRIMARY_FLINK_TYPE_NAME = "TIME"
630
+
631
+ def to_python_value(self, response_value: str | None) -> time | None:
632
+ """Expect string-encoded time in 'HH:MM:SS(.MMMMMM)' format or None from the response value,
633
+ return as datetime.time or raise ValueError."""
634
+ if response_value is None:
635
+ return None
636
+
637
+ self._check_to_python_param_type(str, response_value)
638
+
639
+ try:
640
+ return time.fromisoformat(response_value)
641
+ except Exception as e:
642
+ raise ValueError(f"Invalid time string for TimeConverter: {response_value}") from e
643
+
644
+ @classmethod
645
+ def to_statement_string(cls, python_value: time) -> str:
646
+ """Convert a Python datetime.time value to its for-statement-string-interpolation
647
+ representation, quoted `' TIME HH:MM:SS.MMMMMM.XXXXX'`"""
648
+
649
+ cls._check_to_statement_string_param_type(time, python_value)
650
+
651
+ return f"TIME '{python_value.isoformat(timespec='microseconds')}'"
652
+
653
+
654
+ class TimestampConverter(TypeConverter[datetime, str]):
655
+ """Handles converting Flink TIMESTAMP and TIMESTAMP_LTZ types to/from
656
+ Python datetime.datetime (with or with tzinfo).
657
+
658
+ When converting from Python datetime to Flink TIMESTAMP representation, if the
659
+ datetime carries tzinfo, it is transposed to the equivalent UTC time before conversion,
660
+ which should correspond to any submitted statement's default statement property
661
+ 'sql.local-time-zone' default setting of UTC.
662
+
663
+ When converting from Flink TIMESTAMP type, a tz-naive datetime is returned.
664
+ When converting from Flink TIMESTAMP_LTZ type, a tz-aware datetime with tzinfo=UTC is returned.
665
+
666
+ Therefore, when round-tripping a tz-aware datetime through TIMESTAMP_LTZ, the original
667
+ tzinfo is lost (if not UTC) and replaced with UTC, but the instant in time is preserved.
668
+
669
+ When providing data intented for TIMESTAMP columns, tz-independent datetimes should be used.
670
+ When providing data intended for TIMESTAMP_LTZ columns, tz-aware datetimes should be used.
671
+ """
672
+
673
+ PRIMARY_FLINK_TYPE_NAME = "TIMESTAMP"
674
+
675
+ def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
676
+ # Prevent confusion from possible aliases (test suite). Statement schema
677
+ # JSON spells these out canonically.
678
+ if column_type.type_name not in (
679
+ "TIMESTAMP_WITHOUT_TIME_ZONE",
680
+ "TIMESTAMP_WITH_LOCAL_TIME_ZONE",
681
+ ):
682
+ raise ValueError(
683
+ f"TimestampConverter can only be used with TIMESTAMP_WITHOUT_TIME_ZONE or"
684
+ f" TIMESTAMP_WITH_LOCAL_TIME_ZONE types, got {column_type.type_name}"
685
+ )
686
+ super().__init__(connection, column_type)
687
+
688
+ @classmethod
689
+ def to_statement_string(cls, python_value: datetime) -> str:
690
+ """Convert a Python datetime.datetime value to its for-statement-string-interpolation
691
+ representation, based on whether it has tzinfo or not."""
692
+
693
+ cls._check_to_statement_string_param_type(datetime, python_value)
694
+
695
+ # If has tzinfo, convert to UTC time w/o tzinfo for Flink TIMESTAMP_LTZ
696
+ if python_value.tzinfo is not None:
697
+ python_value = python_value.astimezone(tz=timezone.utc).replace(tzinfo=None)
698
+ # Must explicitly cast in the string forms ...
699
+ flink_type = "timestamp_ltz"
700
+ else:
701
+ flink_type = "timestamp"
702
+
703
+ iso_str = python_value.isoformat(sep=" ", timespec="microseconds")
704
+ return f"cast('{iso_str}' as {flink_type})"
705
+
706
+ def to_python_value(self, response_value: str | None) -> datetime | None:
707
+ """Expect string-encoded timestamp in 'YYYY-MM-DD HH:MM:SS(.MMMMMM)' format
708
+ or None from the response value, return as datetime.datetime or raise ValueError.
709
+
710
+ If the column type is TIMESTAMP_LTZ, the returned datetime will have tzinfo=UTC,
711
+ otherwise it will be tz-naive.
712
+ """
713
+
714
+ if response_value is None:
715
+ return None
716
+
717
+ self._check_to_python_param_type(str, response_value)
718
+
719
+ try:
720
+ # Should only be given TZ-free strings from Flink, otherwise the logic here
721
+ # may be rotten and should be reconsidered.
722
+ dt = datetime.fromisoformat(response_value)
723
+
724
+ except Exception as e:
725
+ raise ValueError(
726
+ f"Invalid timestamp string for TimestampConverter: {response_value}"
727
+ ) from e
728
+
729
+ if dt.tzinfo is not None:
730
+ raise ValueError(
731
+ f"Expected timezone-naive timestamp string from Flink but got {response_value}"
732
+ )
733
+
734
+ # But if we're dealing with TIMESTAMP_LTZ, we should interpret
735
+ # the timestamp as being in UTC and set tzinfo accordingly.
736
+ if self._column_type.type_name == "TIMESTAMP_WITH_LOCAL_TIME_ZONE":
737
+ dt = dt.replace(tzinfo=timezone.utc)
738
+
739
+ return dt
740
+
741
+
742
+ @dataclass
743
+ class YearMonthInterval:
744
+ """Class representing a Flink YEAR TO MONTH interval with separate year and month components.
745
+
746
+ Negative intervals have negative years and/or months. When the years is negative,
747
+ the months should also be negative, and vice versa (so as to avoid ambiguity and to
748
+ represent negative months-only intervals). The smallest magnitude negative interval is
749
+ therefore 0 years and -1 month. When either years or months is non-positive, both will be,
750
+ and vice versa for positive intervals. Property `is_negative` can be used to check the sign.
751
+
752
+ (This differs from Python's timedelta, which represents less than one negative day
753
+ intervals by having negative days and positive seconds/microseconds, which, when
754
+ added together, end up at the right negative point in time (that is, not having
755
+ a zero days component when the total interval is negative but less than one day).)
756
+
757
+ The string representation is of the form '+-Y-M', with a leading '+' or '-' sign,
758
+ followed by the absolute value of years, a hyphen, and the absolute value of months
759
+ zero-padded to two digits.
760
+ """
761
+
762
+ years: int
763
+ months: int
764
+
765
+ def __post_init__(self):
766
+ if not isinstance(self.years, int) or not isinstance(self.months, int):
767
+ raise TypeError("YearMonthInterval years and months must be integers.")
768
+
769
+ if (self.years < 0 and self.months > 0) or (self.years > 0 and self.months < 0):
770
+ raise ValueError("YearMonthInterval years and months must have the same sign.")
771
+
772
+ if abs(self.months) >= 12:
773
+ raise ValueError("YearMonthInterval months must be in the range -11 to 11.")
774
+
775
+ if abs(self.years) > 9999:
776
+ raise ValueError("YearMonthInterval years must be in the range -9999 to 9999")
777
+
778
+ @property
779
+ def is_negative(self) -> bool:
780
+ """Return True if the interval is negative, False otherwise."""
781
+ return self.years < 0 or self.months < 0
782
+
783
+ def __str__(self) -> str:
784
+ sign = "-" if (self.years < 0 or self.months < 0) else "+"
785
+ return f"{sign}{abs(self.years)}-{abs(self.months):02d}"
786
+
787
+ # Rich comparison methods for vague parity with timedelta
788
+ def __lt__(self, other: Any) -> bool:
789
+ if not isinstance(other, YearMonthInterval):
790
+ return NotImplemented
791
+ return (self.years, self.months) < (other.years, other.months)
792
+
793
+ def __le__(self, other: Any) -> bool:
794
+ if not isinstance(other, YearMonthInterval):
795
+ return NotImplemented
796
+ return (self.years, self.months) <= (other.years, other.months)
797
+
798
+ def __eq__(self, other: Any) -> bool:
799
+ if not isinstance(other, YearMonthInterval):
800
+ return NotImplemented
801
+ return self.years == other.years and self.months == other.months
802
+
803
+ def __gt__(self, other: Any) -> bool:
804
+ if not isinstance(other, YearMonthInterval):
805
+ return NotImplemented
806
+ return (self.years, self.months) > (other.years, other.months)
807
+
808
+ def __ge__(self, other: Any) -> bool:
809
+ if not isinstance(other, YearMonthInterval):
810
+ return NotImplemented
811
+ return (self.years, self.months) >= (other.years, other.months)
812
+
813
+ def __ne__(self, other: Any) -> bool:
814
+ if not isinstance(other, YearMonthInterval):
815
+ return NotImplemented
816
+ return self.years != other.years or self.months != other.months
817
+
818
+ def __hash__(self) -> int:
819
+ """Hash based on years and months, since overriding __eq__."""
820
+ return hash((self.years, self.months))
821
+
822
+
823
+ class YearMonthIntervalConverter(TypeConverter[YearMonthInterval, str]):
824
+ """Handles Flink YEAR TO MONTH variant INTERVAL types as strings.
825
+
826
+ INTERVAL YEAR TO MONTH is mapped to Python YearMonthInterval dataclass. Its string
827
+ representation is of the form '+-Y-M', and the Flink schema type will be INTERVAL_YEAR_MONTH.
828
+ """
829
+
830
+ PRIMARY_FLINK_TYPE_NAME = "INTERVAL_YEAR_MONTH"
831
+
832
+ def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
833
+ if column_type.type_name != "INTERVAL_YEAR_MONTH":
834
+ raise ValueError(
835
+ f"YearMonthIntervalConverter can only be used with INTERVAL_YEAR_MONTH types, "
836
+ f"got {column_type.type_name}"
837
+ )
838
+ super().__init__(connection, column_type)
839
+
840
+ def to_python_value(self, response_value: str | None) -> YearMonthInterval | None:
841
+ """Expect string-encoded interval or None from the response value,
842
+ return as YearMonthInterval or raise ValueError."""
843
+
844
+ # Example: '+1-06' for interval of 1 year, 6 months.
845
+ if response_value is None:
846
+ return None
847
+
848
+ self._check_to_python_param_type(str, response_value)
849
+
850
+ # Parse the interval string into a YearMonthInterval
851
+ try:
852
+ sign, rest = response_value[0], response_value[1:]
853
+ years_str, months_str = rest.split("-", 1)
854
+ years = int(years_str)
855
+ months = int(months_str)
856
+ if sign == "-":
857
+ years = -years
858
+ months = -months
859
+ return YearMonthInterval(years=years, months=months)
860
+ except Exception as e:
861
+ raise ValueError(
862
+ f"Invalid interval string for YearMonthIntervalConverter: {response_value}"
863
+ ) from e
864
+
865
+ @classmethod
866
+ def to_statement_string(cls, python_value: YearMonthInterval) -> str:
867
+ """Convert a Python YearMonthInterval value representing an interval to its
868
+ for-statement-string-interpolation representation."""
869
+ cls._check_to_statement_string_param_type(YearMonthInterval, python_value)
870
+
871
+ interval_str = str(python_value)
872
+ return f"INTERVAL '{interval_str}' YEAR TO MONTH"
873
+
874
+
875
+ class DaysIntervalConverter(TypeConverter[timedelta, str]):
876
+ """Handles Flink DAYS TO SECOND variant INTERVAL types as strings.
877
+
878
+ INTERVAL DAY TO SECOND is mapped to Python timedelta. Its string representation
879
+ is of the form '+-D HH:MM:SS.MMMMMM', and the Flink schema type will be
880
+ INTERVAL_DAY_TIME.
881
+
882
+ We have to take care when converting negative intervals carrying fractional
883
+ seconds, since Python's timedelta normalizes negative timedeltas in a surprising way,
884
+ expressing them with negative days and positive seconds/microseconds.
885
+ """
886
+
887
+ PRIMARY_FLINK_TYPE_NAME = "INTERVAL_DAY_TIME"
888
+
889
+ _HOURS_TO_SECONDS_RE = re.compile(
890
+ r"^(?P<sign>[+-])(?P<days>\d+)\s(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})(?:\.(?P<micro>\d{1,6}))?$"
891
+ )
892
+
893
+ def to_python_value(self, response_value: str | None) -> timedelta | None:
894
+ """Expect string-encoded interval or None from the response value,
895
+ return as str or raise ValueError."""
896
+
897
+ # Example: '+0 04:00:00.000' for interval of 0 days, 4 hours.
898
+
899
+ if response_value is None:
900
+ return None
901
+
902
+ self._check_to_python_param_type(str, response_value)
903
+
904
+ # Parse the interval string into a timedelta
905
+ # Examples:
906
+ # * '+1 12:30:45.123456' (positive days through to microseconds),
907
+ # * '-0 00:15:00' (negative 15 minutes, no fractional seconds)
908
+ try:
909
+ m = self._HOURS_TO_SECONDS_RE.match(response_value)
910
+ if not m:
911
+ raise ValueError(f"Invalid interval format: {response_value}")
912
+
913
+ days = int(m.group("days"))
914
+ hours = int(m.group("hours"))
915
+ minutes = int(m.group("minutes"))
916
+ seconds = int(m.group("seconds"))
917
+
918
+ micro_group = m.group("micro")
919
+ microseconds = int(micro_group.ljust(6, "0")) if micro_group else 0
920
+
921
+ # Build a positive timedelta first
922
+ td = timedelta(
923
+ days=days, hours=hours, minutes=minutes, seconds=seconds, microseconds=microseconds
924
+ )
925
+
926
+ # Negate if needed.
927
+ if m.group("sign") == "-":
928
+ td = -td
929
+
930
+ return td
931
+ except Exception as e:
932
+ raise ValueError(
933
+ f"Invalid interval string for IntervalConverter: {response_value}"
934
+ ) from e
935
+
936
+ ZERO_TIMEDELTA = timedelta(0)
937
+
938
+ @classmethod
939
+ def to_statement_string(cls, python_value: timedelta) -> str:
940
+ """Convert a Python timedelta value representing an interval to its
941
+ for-statement-string-interpolation representation."""
942
+ cls._check_to_statement_string_param_type(timedelta, python_value)
943
+
944
+ # If negative, convert to positive and remember sign to avoid negative timedelta
945
+ # normalization quirks (python normalizes to negative days, positive seconds/microseconds
946
+ # which end up representing the right point in timeline when all added together).
947
+ if python_value < cls.ZERO_TIMEDELTA:
948
+ # Make positive for field extraction.
949
+ python_value = -python_value
950
+ sign = "-"
951
+ else:
952
+ sign = "+"
953
+
954
+ # Collect integral days, hours, minutes, seconds, microseconds for Flink string
955
+ # representation.
956
+ total_seconds = int(python_value.total_seconds())
957
+ days, remainder = divmod(total_seconds, 86400)
958
+ hours, remainder = divmod(remainder, 3600)
959
+ minutes, seconds = divmod(remainder, 60)
960
+ microseconds = python_value.microseconds
961
+
962
+ interval_str = f"{sign}{days} {hours:02}:{minutes:02}:{seconds:02}"
963
+ if microseconds > 0:
964
+ interval_str += f".{microseconds:06}"
965
+ precision = "(6)"
966
+ else:
967
+ precision = ""
968
+
969
+ return f"INTERVAL '{interval_str}' DAY TO SECOND{precision}"
970
+
971
+
972
+ class ArrayConverter(TypeConverter[list, list]):
973
+ """Handles Flink ARRAY type to/from Python list.
974
+
975
+ Caveats:
976
+ * Nested lists / arrays are supported, but empty arrays are not (empty array literals
977
+ are not supported by Flink at this time).
978
+ * Nones in the list are supported, and will be converted to SQL NULLs of the
979
+ appropriate element type, however a list of all Nones is not supported since
980
+ the element type cannot be determined in that case.
981
+ """
982
+
983
+ PRIMARY_FLINK_TYPE_NAME = "ARRAY"
984
+
985
+ _element_converter: TypeConverter
986
+ """Type converter for array element type."""
987
+
988
+ def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
989
+ if column_type.type_name != "ARRAY":
990
+ raise InterfaceError(
991
+ f"ArrayConverter can only be used with ARRAY types, got {column_type.type_name}"
992
+ )
993
+
994
+ # Determine the element type's converter from the column_type's type parameters.
995
+ element_type_def = column_type.element_type
996
+ if not element_type_def:
997
+ raise InterfaceError(
998
+ "ArrayConverter cannot determine element type from column type definition."
999
+ )
1000
+
1001
+ element_converter_cls = _flink_type_name_to_converter_map.get(element_type_def.type_name)
1002
+ if not element_converter_cls:
1003
+ raise TypeError(
1004
+ f"Conversion for array element of type {element_type_def.type_name} is not"
1005
+ " implemented."
1006
+ )
1007
+
1008
+ self._element_converter = element_converter_cls(connection, element_type_def)
1009
+
1010
+ super().__init__(connection, column_type)
1011
+
1012
+ def to_python_value(self, response_value: list | None) -> list | None:
1013
+ """Expect list or None from the response value, return as list or raise ValueError."""
1014
+ if response_value is None:
1015
+ return None
1016
+
1017
+ self._check_to_python_param_type(list, response_value)
1018
+
1019
+ response_value_converted = []
1020
+ for element in response_value:
1021
+ converted_element = self._element_converter.to_python_value(element)
1022
+ response_value_converted.append(converted_element)
1023
+
1024
+ return response_value_converted
1025
+
1026
+ @classmethod
1027
+ def to_statement_string(cls, python_value: list) -> str:
1028
+ """Convert a Python list value to its for-statement-string-interpolation
1029
+ representation."""
1030
+ cls._check_to_statement_string_param_type(list, python_value)
1031
+
1032
+ if len(python_value) == 0:
1033
+ # Empty array, it seems that Flink does not support literal empty arrays grr boo hoo.
1034
+ # (as well as would make it hard for us to determine element type anyway to spell the
1035
+ # element type in an empty ARRAY<element_type> literal).
1036
+ raise ValueError("Cannot convert empty list to Flink ARRAY literal.")
1037
+
1038
+ # Convert each element to its string representation
1039
+ element_converter_cls = determine_element_converter_cls(python_value)
1040
+ none_element_str = SqlNone(element_converter_cls.PRIMARY_FLINK_TYPE_NAME).__str__()
1041
+
1042
+ element_strings = []
1043
+
1044
+ for element in python_value:
1045
+ # May raise ValueError if individual element is of wrong type.
1046
+ if element is not None:
1047
+ element_str = element_converter_cls.to_statement_string(element)
1048
+ else:
1049
+ element_str = none_element_str
1050
+
1051
+ element_strings.append(element_str)
1052
+
1053
+ # Join elements with commas and wrap in ARRAY[...]
1054
+ return f"ARRAY[{', '.join(element_strings)}]"
1055
+
1056
+
1057
+ class MapConverter(TypeConverter[dict, list]):
1058
+ """Handles Flink MAP type to/from Python dict.
1059
+
1060
+ Caveats:
1061
+ * Empty python dicts are not supported since Flink does not support literal empty maps at this
1062
+ time.
1063
+ * Flink Map keys must be of a type that is hashable in Python.
1064
+ * Python dict keys and values may be None, which will be converted to SQL NULLs of the
1065
+ appropriate types, however a map with all keys or all values as None is not supported since
1066
+ the key/value types cannot be determined in that case.
1067
+ * Python dict keys and values must be of uniform type (or None), since Flink MAP types
1068
+ require uniform key and value types.
1069
+ """
1070
+
1071
+ PRIMARY_FLINK_TYPE_NAME = "MAP"
1072
+
1073
+ key_converter: TypeConverter
1074
+ """Type converter for map key type."""
1075
+ value_converter: TypeConverter
1076
+ """Type converter for map value type."""
1077
+
1078
+ def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
1079
+ if column_type.type_name != "MAP":
1080
+ raise InterfaceError(
1081
+ f"MapConverter can only be used with MAP types, got {column_type.type_name}"
1082
+ )
1083
+
1084
+ # Determine the key and value type's converters from the column_type's key and value
1085
+ # type parameters.
1086
+ key_type_def = column_type.key_type
1087
+ value_type_def = column_type.value_type
1088
+ if not key_type_def:
1089
+ raise InterfaceError(
1090
+ "MapConverter cannot determine key type from column type definition."
1091
+ )
1092
+ if not value_type_def:
1093
+ raise InterfaceError(
1094
+ "MapConverter cannot determine value type from column type definition."
1095
+ )
1096
+
1097
+ key_converter_cls = _flink_type_name_to_converter_map.get(key_type_def.type_name)
1098
+ if not key_converter_cls:
1099
+ raise TypeError(
1100
+ f"Conversion for map key of type {key_type_def.type_name} is not implemented."
1101
+ )
1102
+
1103
+ self.key_converter = key_converter_cls(connection, key_type_def)
1104
+
1105
+ value_converter_cls = _flink_type_name_to_converter_map.get(value_type_def.type_name)
1106
+ if not value_converter_cls:
1107
+ raise TypeError(
1108
+ f"Conversion for map value of type {value_type_def.type_name} is not implemented."
1109
+ )
1110
+ self.value_converter = value_converter_cls(connection, value_type_def)
1111
+
1112
+ super().__init__(connection, column_type)
1113
+
1114
+ def to_python_value(self, response_value: list | None) -> dict | None:
1115
+ """Expect dict or None from the response value, return as dict or raise ValueError."""
1116
+ if response_value is None:
1117
+ return None
1118
+
1119
+ self._check_to_python_param_type(list, response_value)
1120
+
1121
+ # Will be a list of pair lists: [[enc-key1, enc-value1], [enc-key2, enc-value2], ...]
1122
+ # where keys and values will be the from-response encodings for their
1123
+ # types. Use the decoders for the key and value types for each pair.
1124
+
1125
+ result_dict = {}
1126
+ for pair in response_value:
1127
+ if not isinstance(pair, list) or len(pair) != 2:
1128
+ raise ValueError(
1129
+ f"Expected key-value pair list of length 2 for MapConverter but got: {pair}"
1130
+ )
1131
+
1132
+ # Promote this key/value pair from from-response encodings to Python values.
1133
+ key = self.key_converter.to_python_value(pair[0])
1134
+ value = self.value_converter.to_python_value(pair[1])
1135
+
1136
+ result_dict[key] = value
1137
+
1138
+ return result_dict
1139
+
1140
+ @classmethod
1141
+ def to_statement_string(cls, python_value: dict) -> str:
1142
+ """Convert a Python dict value to its for-statement-string-interpolation
1143
+ representation."""
1144
+
1145
+ # Example: MAP['key1', 12, 'key2', 22] for a map with string keys and integer values.
1146
+
1147
+ cls._check_to_statement_string_param_type(dict, python_value)
1148
+
1149
+ if len(python_value) == 0:
1150
+ # Empty map, it seems that Flink does not support literal empty maps grr boo hoo.
1151
+ raise ValueError("Cannot convert empty dict to Flink MAP literal.")
1152
+
1153
+ # Find the converter classes for keys and values
1154
+ key_converter_cls = determine_element_converter_cls(python_value.keys())
1155
+ value_converter_cls = determine_element_converter_cls(python_value.values())
1156
+
1157
+ none_key_str = SqlNone(key_converter_cls.PRIMARY_FLINK_TYPE_NAME).__str__()
1158
+ none_value_str = SqlNone(value_converter_cls.PRIMARY_FLINK_TYPE_NAME).__str__()
1159
+
1160
+ # Convert each key-value pair to its string representation, append each
1161
+ # to list to join later.
1162
+ keys_and_values: list[str] = []
1163
+
1164
+ for key, value in python_value.items():
1165
+ # May raise ValueError if individual key or value is of wrong type.
1166
+ if key is not None:
1167
+ key_str = key_converter_cls.to_statement_string(key)
1168
+ else:
1169
+ key_str = none_key_str
1170
+
1171
+ keys_and_values.append(key_str)
1172
+
1173
+ if value is not None:
1174
+ value_str = value_converter_cls.to_statement_string(value)
1175
+ else:
1176
+ value_str = none_value_str
1177
+
1178
+ keys_and_values.append(value_str)
1179
+
1180
+ # Join key-value pairs with commas and wrap in MAP[...]
1181
+ return f"MAP[{', '.join(keys_and_values)}]"
1182
+
1183
+
1184
+ class MultisetConverter(TypeConverter[Counter, list]):
1185
+ """Handles Flink MULTISET type to/from Python collections.Counter.
1186
+
1187
+ A MULTISET is like a MAP from element to count, where the count is an integer
1188
+ representing the number of occurrences of the element in the multiset.
1189
+ This is mapped to Python's collections.Counter class.
1190
+
1191
+ The Counter must not be empty, since we need at least one non-None key element
1192
+ to determine the key type for conversion.
1193
+ """
1194
+
1195
+ PRIMARY_FLINK_TYPE_NAME = "MULTISET"
1196
+
1197
+ element_converter: TypeConverter
1198
+ """Type converter for the multiset's element / key type."""
1199
+
1200
+ int_converter: IntegerConverter
1201
+ """Integer converter for the counts portion of the multiset."""
1202
+
1203
+ def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
1204
+ if column_type.type_name != "MULTISET":
1205
+ raise InterfaceError(
1206
+ f"MultisetConverter can only be used with MULTISET types, got {column_type.type_name}" # noqa: E501
1207
+ )
1208
+
1209
+ # Determine the element type's converter from the column_type's type parameters.
1210
+ element_type_def = column_type.element_type
1211
+ if not element_type_def:
1212
+ raise InterfaceError(
1213
+ "MultisetConverter cannot determine element type from column type definition."
1214
+ )
1215
+
1216
+ element_converter_cls = _flink_type_name_to_converter_map.get(element_type_def.type_name)
1217
+ if not element_converter_cls:
1218
+ raise TypeError(
1219
+ f"Conversion for multiset element of type {element_type_def.type_name} is not implemented." # noqa: E501
1220
+ )
1221
+
1222
+ self.element_converter = element_converter_cls(connection, element_type_def)
1223
+
1224
+ # Always use IntegerConverter for the corresponding counts.
1225
+ self.int_converter = IntegerConverter(
1226
+ connection, ColumnTypeDefinition(type="INTEGER", nullable=False)
1227
+ )
1228
+
1229
+ super().__init__(connection, column_type)
1230
+
1231
+ def to_python_value(self, response_value: list | None) -> Counter | None:
1232
+ """Expect list of [element, count] pairs or None from the response value,
1233
+ return as Counter or raise ValueError."""
1234
+ if response_value is None:
1235
+ return None
1236
+
1237
+ self._check_to_python_param_type(list, response_value)
1238
+
1239
+ result_counter: Counter = Counter()
1240
+ for pair in response_value:
1241
+ if not isinstance(pair, list):
1242
+ raise InterfaceError(
1243
+ f"Expected to receive value+count list for MultisetConverter, but got {type(pair)} instead." # noqa: E501
1244
+ )
1245
+ try:
1246
+ left, right = pair
1247
+ except Exception as e:
1248
+ raise InterfaceError(
1249
+ f"Expected element + count pair list for MultisetConverter but got: {pair}"
1250
+ ) from e
1251
+
1252
+ element = self.element_converter.to_python_value(left)
1253
+ if element is None:
1254
+ raise InterfaceError("Expected element for MultisetConverter but got None")
1255
+
1256
+ count = self.int_converter.to_python_value(right)
1257
+ if count is None:
1258
+ raise InterfaceError("Expected integer count for MultisetConverter but got None")
1259
+
1260
+ result_counter[element] = count
1261
+
1262
+ return result_counter
1263
+
1264
+ @classmethod
1265
+ def to_statement_string(cls, python_value: Counter) -> str:
1266
+ """Flink does not currently support any literal MULTISET syntax."""
1267
+ raise InterfaceError("Flink does not currently support MULTISET literals.")
1268
+
1269
+
1270
+ class IsDataclass(Protocol):
1271
+ """Protocol describing @dataclass instances, surprisingly enough there is no built-in one."""
1272
+
1273
+ __dataclass_fields__: ClassVar[dict[str, Any]]
1274
+
1275
+
1276
+ RowPythonTypes = tuple | IsDataclass
1277
+ """The types that can be used to represent Flink ROW column values in Python:
1278
+ either tuple (including namedtuple() and typing.NamedTuple) or @dataclass instances."""
1279
+
1280
+
1281
+ class RowConverter(TypeConverter[RowPythonTypes, list]):
1282
+ """Convert Flink ROW type to/from Python tuple, namedtuple, or @dataclass instances.
1283
+
1284
+ When converting from Flink ROW type, a namedtuple or @dataclass instance is returned,
1285
+ with field names corresponding to the ROW's field names. The class to use
1286
+ is cached globally based on the field names, so that multiple
1287
+ ROWs with the same field names share the same registered class (even across
1288
+ multiple RowConverter instances / separate queries or cursors).
1289
+
1290
+ The class to use for a given set of field names is obtained from the connection's
1291
+ row class registry, which will create a new collections.namedtuple class
1292
+ as needed.
1293
+
1294
+ When interpolating python tuples, namedtuples, or dataclasses into statements strings,
1295
+ the values are converted positionally field by field, and the resulting string is
1296
+ of the form "ROW(field1_value, field2_value, ...)".
1297
+ """
1298
+
1299
+ PRIMARY_FLINK_TYPE_NAME = "ROW"
1300
+
1301
+ _field_converters: list[TypeConverter]
1302
+ """List of TypeConverter instances for each field in the row, in order."""
1303
+ _field_names: list[str]
1304
+ """List of field names in the row, in order."""
1305
+ _python_value_class: type[RowPythonTypes]
1306
+ """The namedtuple or @dataclass class from the connection's row class registry
1307
+ corresponding to this row type's field names."""
1308
+
1309
+ def __init__(self, connection: Connection, column_type: ColumnTypeDefinition):
1310
+ if column_type.type_name != "ROW":
1311
+ raise InterfaceError(
1312
+ f"RowConverter can only be used with ROW types, got {column_type.type_name}"
1313
+ )
1314
+
1315
+ if not column_type.fields:
1316
+ raise InterfaceError("RowConverter requires column type definition with fields")
1317
+
1318
+ self._field_converters = []
1319
+ self._field_names = []
1320
+
1321
+ for field_def in column_type.fields:
1322
+ field_name = field_def.name
1323
+ self._field_names.append(field_name)
1324
+
1325
+ field_type_def = field_def.type
1326
+ if not field_type_def:
1327
+ raise InterfaceError(
1328
+ f"RowConverter cannot determine type for field '{field_name}'."
1329
+ )
1330
+
1331
+ field_converter_cls = _flink_type_name_to_converter_map.get(field_type_def.type_name)
1332
+ if not field_converter_cls:
1333
+ raise TypeError(
1334
+ f"Conversion for row field '{field_name}' of type "
1335
+ f"{field_type_def.type_name} is not implemented."
1336
+ )
1337
+
1338
+ field_converter = field_converter_cls(connection, field_type_def)
1339
+ self._field_converters.append(field_converter)
1340
+
1341
+ # Get or create the class for this row type's field names.
1342
+ self._python_value_class = connection._row_type_registry.get_row_class(self._field_names)
1343
+
1344
+ super().__init__(connection, column_type)
1345
+
1346
+ def to_python_value(self, response_value: list | None) -> RowPythonTypes | None:
1347
+ """Expect list or None from the response value, return as registered class (or namedtuple)
1348
+ or raise InterfaceError."""
1349
+ if response_value is None:
1350
+ return None
1351
+
1352
+ self._check_to_python_param_type(list, response_value)
1353
+
1354
+ if len(response_value) != len(self._field_converters):
1355
+ raise InterfaceError(
1356
+ f"Expected {len(self._field_converters)} fields for RowConverter but got "
1357
+ f"{len(response_value)}"
1358
+ )
1359
+
1360
+ field_values = []
1361
+ for field_name, converter, field_value in zip(
1362
+ self._field_names, self._field_converters, response_value, strict=True
1363
+ ):
1364
+ # Each converter may raise if field value is unexpected type, range, etc.
1365
+ try:
1366
+ converted_field_value = converter.to_python_value(field_value)
1367
+ except Exception as e:
1368
+ raise InterfaceError(
1369
+ f"Error converting field '{field_name}' value in RowConverter: {e}"
1370
+ ) from e
1371
+
1372
+ field_values.append(converted_field_value)
1373
+
1374
+ # Return an instance of the registered class corresponding to the
1375
+ # ROW's field names with the converted field values.
1376
+ return self._python_value_class(*field_values)
1377
+
1378
+ @classmethod
1379
+ def handles_python_value(cls, python_value: Any) -> bool:
1380
+ """Return True if the given python_value is a tuple, namedtuple, typing.NamedTuple,
1381
+ or @dataclass instance, False otherwise.
1382
+
1383
+ Assists `get_converter_for_python_value()` in determining the proper converter class
1384
+ for a given python value.
1385
+ """
1386
+
1387
+ # collections.namedtuple and typing.NamedTuple will be instances of tuple, otherwise
1388
+ # we check for dataclass *instances*.
1389
+ return isinstance(python_value, tuple) or (
1390
+ is_dataclass(python_value) and not isinstance(python_value, type)
1391
+ )
1392
+
1393
+ @classmethod
1394
+ def to_statement_string(cls, python_value: RowPythonTypes) -> str:
1395
+ """Convert a Python tuple, collections.namedtuple, typing.NamedTuple, or @dataclass
1396
+ instance to its for-statement-string-interpolation
1397
+ representation, "(ROW(field1_value, field2_value, ...))".
1398
+
1399
+ When providing a tuple or namedtuple, the values are taken positionally.
1400
+ When providing a dataclass instance, the field values are taken in the order
1401
+ of their declaration in the dataclass.
1402
+
1403
+ (The whole expression must be wrapped in parentheses when used in a larger expression,
1404
+ e.g., in an INSERT statement VALUES clause, otherwise strange parsing errors will occur.)
1405
+ """
1406
+
1407
+ value_as_tuple: tuple
1408
+
1409
+ if isinstance(python_value, tuple):
1410
+ # User provided a plain tuple, namedtuple, or NamedTuple subclass: use as-is.
1411
+ value_as_tuple = python_value
1412
+ elif is_dataclass(python_value) and not isinstance(python_value, type):
1413
+ # Decompose dataclass instance to tuple of its field values.
1414
+ value_as_tuple = tuple(getattr(python_value, f.name) for f in fields(python_value))
1415
+ else:
1416
+ raise TypeMismatchError(
1417
+ converter_name=cls.__name__,
1418
+ method_name="to_statement_string",
1419
+ expected_type="tuple, namedtuple, NamedTuple, or dataclass",
1420
+ bad_value=python_value,
1421
+ )
1422
+
1423
+ field_strings: list[str] = []
1424
+ for field_value in value_as_tuple:
1425
+ # May raise InterfaceError if individual field is not of a handled type.
1426
+ field_converter_cls = get_converter_for_python_value(field_value)
1427
+
1428
+ field_str = field_converter_cls.to_statement_string(field_value)
1429
+ field_strings.append(field_str)
1430
+
1431
+ return f"(ROW({', '.join(field_strings)}))"
1432
+
1433
+
1434
+ _flink_type_name_to_converter_map: dict[str, type[TypeConverter]] = {
1435
+ # Null type
1436
+ "NULL": NullResultConverter,
1437
+ # Boolean type
1438
+ "BOOLEAN": BooleanConverter,
1439
+ # Integer types
1440
+ "TINYINT": IntegerConverter,
1441
+ "SMALLINT": IntegerConverter,
1442
+ "INTEGER": IntegerConverter,
1443
+ "BIGINT": IntegerConverter,
1444
+ # Fixed precision types
1445
+ "DECIMAL": DecimalConverter,
1446
+ "DEC": DecimalConverter,
1447
+ "NUMERIC": DecimalConverter,
1448
+ # Floating point types
1449
+ "FLOAT": FloatConverter,
1450
+ "DOUBLE": FloatConverter,
1451
+ "DOUBLE PRECISION": FloatConverter,
1452
+ # Date type
1453
+ "DATE": DateConverter,
1454
+ # Time type
1455
+ "TIME": TimeConverter,
1456
+ "TIME_WITHOUT_TIME_ZONE": TimeConverter,
1457
+ # Timestamp type
1458
+ "TIMESTAMP": TimestampConverter,
1459
+ "TIMESTAMP_WITHOUT_TIME_ZONE": TimestampConverter,
1460
+ "TIMESTAMP_LTZ": TimestampConverter,
1461
+ "TIMESTAMP_WITH_LOCAL_TIME_ZONE": TimestampConverter,
1462
+ # Interval types
1463
+ "INTERVAL_DAY_TIME": DaysIntervalConverter,
1464
+ "INTERVAL DAYS TO SECOND": DaysIntervalConverter,
1465
+ "INTERVAL_YEAR_MONTH": YearMonthIntervalConverter,
1466
+ "INTERVAL YEAR TO MONTH": YearMonthIntervalConverter,
1467
+ # String types
1468
+ "CHAR": StringConverter,
1469
+ "VARCHAR": StringConverter,
1470
+ "STRING": StringConverter,
1471
+ # Binary types
1472
+ "VARBINARY": VarBinaryConverter,
1473
+ "BINARY": VarBinaryConverter,
1474
+ "BYTES": VarBinaryConverter,
1475
+ # Array type
1476
+ "ARRAY": ArrayConverter,
1477
+ # Map type
1478
+ "MAP": MapConverter,
1479
+ # Multiset type
1480
+ "MULTISET": MultisetConverter,
1481
+ # Row type
1482
+ "ROW": RowConverter,
1483
+ }
1484
+
1485
+
1486
+ _python_type_to_type_converter: dict[type, type[TypeConverter]] = {
1487
+ None.__class__: NullResultConverter,
1488
+ SqlNone: SqlNoneConverter,
1489
+ bool: BooleanConverter,
1490
+ int: IntegerConverter,
1491
+ Decimal: DecimalConverter,
1492
+ float: FloatConverter,
1493
+ date: DateConverter,
1494
+ time: TimeConverter,
1495
+ str: StringConverter,
1496
+ bytes: VarBinaryConverter,
1497
+ datetime: TimestampConverter,
1498
+ YearMonthInterval: YearMonthIntervalConverter,
1499
+ timedelta: DaysIntervalConverter,
1500
+ list: ArrayConverter,
1501
+ dict: MapConverter,
1502
+ Counter: MultisetConverter,
1503
+ tuple: RowConverter, # well, namedtuple is a duck-typed subclass of tuple
1504
+ }
1505
+
1506
+ SupportedPythonTypes: TypeAlias = (
1507
+ None.__class__
1508
+ | SqlNone
1509
+ | bool
1510
+ | int
1511
+ | Decimal
1512
+ | float
1513
+ | date
1514
+ | time
1515
+ | str
1516
+ | bytes
1517
+ | datetime
1518
+ | YearMonthInterval
1519
+ | timedelta
1520
+ | list
1521
+ | dict
1522
+ | Counter
1523
+ | tuple
1524
+ )
1525
+
1526
+
1527
+ # Initialize static SqlNone members for common types, must be done after class definition
1528
+ # and after the global type maps are defined.
1529
+ SqlNone.INTEGER = SqlNone("INTEGER")
1530
+ SqlNone.VARCHAR = SqlNone("VARCHAR")
1531
+ SqlNone.STRING = SqlNone("STRING")
1532
+ SqlNone.VARBINARY = SqlNone("VARBINARY")
1533
+ SqlNone.BOOLEAN = SqlNone("BOOLEAN")
1534
+ SqlNone.DECIMAL = SqlNone("DECIMAL")
1535
+ SqlNone.FLOAT = SqlNone("FLOAT")
1536
+ SqlNone.DATE = SqlNone("DATE")
1537
+ SqlNone.TIME = SqlNone("TIME")
1538
+ SqlNone.TIMESTAMP = SqlNone("TIMESTAMP")
1539
+ SqlNone.YEAR_MONTH_INTERVAL = SqlNone("INTERVAL YEAR TO MONTH")
1540
+ SqlNone.DAY_SECOND_INTERVAL = SqlNone("INTERVAL DAYS TO SECOND")
1541
+
1542
+
1543
+ def get_converter_for_python_value(python_value: SupportedPythonTypes) -> type[TypeConverter]:
1544
+ """Get the TypeConverter class for the given Python value. Used prior to calling
1545
+ converter_class.to_statement_string().
1546
+
1547
+ Raises InterfaceError if the type is not supported.
1548
+ """
1549
+ # Most converters can be found directly from the type of the value, other than
1550
+ # namedtuples which are duck-typed subclasses of tuple.
1551
+ value_type = type(python_value)
1552
+
1553
+ # Will find for most types, including if user has provided a plain tuple to be converted
1554
+ # to a ROW.
1555
+ converter_class = _python_type_to_type_converter.get(value_type)
1556
+
1557
+ # Otherwise check to see if RowConverter can handle it (namedtuple, NamedTuple, dataclass).
1558
+ if not converter_class and RowConverter.handles_python_value(python_value):
1559
+ converter_class = RowConverter
1560
+
1561
+ if not converter_class:
1562
+ raise InterfaceError(f"Conversion for parameter of type {value_type} is not implemented.")
1563
+
1564
+ return converter_class
1565
+
1566
+
1567
+ def convert_statement_parameters(
1568
+ parameters: tuple | list,
1569
+ ) -> tuple:
1570
+ """Convert a list or tuple of Python parameters to a tuple of their string representations
1571
+ for interpolation into a %s-laden statement string.
1572
+
1573
+ Returns: A tuple of string representations of the parameters.
1574
+ """
1575
+
1576
+ # get_converter_for_python_value() may raise InterfaceError if any parameter's type is
1577
+ # not supported.
1578
+ return tuple(
1579
+ get_converter_for_python_value(param).to_statement_string(param) for param in parameters
1580
+ )
1581
+
1582
+
1583
+ def determine_element_converter_cls(python_value: Iterable) -> type[TypeConverter]:
1584
+ """Determine the TypeConverter class for the elements of the given Python sequence.
1585
+
1586
+ Assumes the list is non-empty and that all elements are of the same type, or
1587
+ contains None elements. Cannot be all None. The list will already have
1588
+ been proven to be non-empty by the caller.
1589
+
1590
+ Returns: The TypeConverter class for the type of the first non-None element.
1591
+
1592
+ Raises: InterfaceError if the element type is not supported.
1593
+ """
1594
+ for element in python_value:
1595
+ if element is not None:
1596
+ break
1597
+ else:
1598
+ raise InterfaceError("Cannot determine element type: all elements are None.")
1599
+
1600
+ # Will raise InterfaceError if type not supported.
1601
+ try:
1602
+ return get_converter_for_python_value(element)
1603
+ except InterfaceError as e:
1604
+ raise InterfaceError(
1605
+ f"Conversion for array element of type {type(element)} is not implemented."
1606
+ ) from e