singlestoredb 1.12.3__cp38-abi3-win32.whl → 1.13.0__cp38-abi3-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of singlestoredb might be problematic. Click here for more details.
- _singlestoredb_accel.pyd +0 -0
- singlestoredb/__init__.py +1 -1
- singlestoredb/apps/__init__.py +1 -0
- singlestoredb/apps/_config.py +6 -0
- singlestoredb/apps/_connection_info.py +8 -0
- singlestoredb/apps/_python_udfs.py +85 -0
- singlestoredb/config.py +14 -2
- singlestoredb/functions/__init__.py +11 -1
- singlestoredb/functions/decorator.py +102 -252
- singlestoredb/functions/dtypes.py +545 -198
- singlestoredb/functions/ext/asgi.py +288 -90
- singlestoredb/functions/ext/json.py +29 -36
- singlestoredb/functions/ext/mmap.py +1 -1
- singlestoredb/functions/ext/rowdat_1.py +50 -70
- singlestoredb/functions/signature.py +816 -144
- singlestoredb/functions/typing.py +41 -0
- singlestoredb/functions/utils.py +342 -0
- singlestoredb/http/connection.py +3 -1
- singlestoredb/management/manager.py +6 -1
- singlestoredb/management/utils.py +2 -2
- singlestoredb/mysql/connection.py +17 -11
- singlestoredb/tests/ext_funcs/__init__.py +476 -237
- singlestoredb/tests/test_basics.py +2 -0
- singlestoredb/tests/test_ext_func.py +192 -3
- singlestoredb/tests/test_udf.py +101 -131
- singlestoredb/tests/test_udf_returns.py +459 -0
- {singlestoredb-1.12.3.dist-info → singlestoredb-1.13.0.dist-info}/METADATA +2 -1
- {singlestoredb-1.12.3.dist-info → singlestoredb-1.13.0.dist-info}/RECORD +32 -28
- {singlestoredb-1.12.3.dist-info → singlestoredb-1.13.0.dist-info}/LICENSE +0 -0
- {singlestoredb-1.12.3.dist-info → singlestoredb-1.13.0.dist-info}/WHEEL +0 -0
- {singlestoredb-1.12.3.dist-info → singlestoredb-1.13.0.dist-info}/entry_points.txt +0 -0
- {singlestoredb-1.12.3.dist-info → singlestoredb-1.13.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
from typing import Iterable
|
|
3
|
+
from typing import Tuple
|
|
4
|
+
from typing import TypeVar
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from typing import TypeVarTuple # type: ignore
|
|
8
|
+
from typing import Unpack # type: ignore
|
|
9
|
+
except ImportError:
|
|
10
|
+
# Python 3.8 and earlier do not have TypeVarTuple
|
|
11
|
+
from typing_extensions import TypeVarTuple # type: ignore
|
|
12
|
+
from typing_extensions import Unpack # type: ignore
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
T = TypeVar('T', bound=Iterable[Any]) # Generic type for iterable types
|
|
16
|
+
|
|
17
|
+
#
|
|
18
|
+
# Masked types are used for pairs of vectors where the first element is the
|
|
19
|
+
# vector and the second element is a boolean mask indicating which elements
|
|
20
|
+
# are NULL. The boolean mask is a vector of the same length as the first
|
|
21
|
+
# element, where True indicates that the corresponding element in the first
|
|
22
|
+
# element is NULL.
|
|
23
|
+
#
|
|
24
|
+
# This is needed for vector types that do not support NULL values, such as
|
|
25
|
+
# numpy arrays and pandas Series.
|
|
26
|
+
#
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Masked(Tuple[T, T]):
|
|
30
|
+
def __new__(cls, *args: T) -> 'Masked[Tuple[T, T]]': # type: ignore
|
|
31
|
+
return tuple.__new__(cls, (args[0], args[1])) # type: ignore
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
Ts = TypeVarTuple('Ts')
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Table(Tuple[Unpack[Ts]]):
|
|
38
|
+
"""Return type for a table valued function."""
|
|
39
|
+
|
|
40
|
+
def __new__(cls, *args: Unpack[Ts]) -> 'Table[Tuple[Unpack[Ts]]]': # type: ignore
|
|
41
|
+
return tuple.__new__(cls, args) # type: ignore
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import inspect
|
|
3
|
+
import struct
|
|
4
|
+
import sys
|
|
5
|
+
import types
|
|
6
|
+
import typing
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any
|
|
9
|
+
from typing import Dict
|
|
10
|
+
from typing import Iterable
|
|
11
|
+
|
|
12
|
+
from .typing import Masked
|
|
13
|
+
|
|
14
|
+
if sys.version_info >= (3, 10):
|
|
15
|
+
_UNION_TYPES = {typing.Union, types.UnionType}
|
|
16
|
+
else:
|
|
17
|
+
_UNION_TYPES = {typing.Union}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
is_dataclass = dataclasses.is_dataclass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def is_masked(obj: Any) -> bool:
|
|
24
|
+
"""Check if an object is a Masked type."""
|
|
25
|
+
origin = typing.get_origin(obj)
|
|
26
|
+
if origin is not None:
|
|
27
|
+
return origin is Masked or \
|
|
28
|
+
(inspect.isclass(origin) and issubclass(origin, Masked))
|
|
29
|
+
return False
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def is_union(x: Any) -> bool:
|
|
33
|
+
"""Check if the object is a Union."""
|
|
34
|
+
return typing.get_origin(x) in _UNION_TYPES
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_annotations(obj: Any) -> Dict[str, Any]:
|
|
38
|
+
"""Get the annotations of an object."""
|
|
39
|
+
return typing.get_type_hints(obj)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_module(obj: Any) -> str:
|
|
43
|
+
"""Get the module of an object."""
|
|
44
|
+
module = getattr(obj, '__module__', '').split('.')
|
|
45
|
+
if module:
|
|
46
|
+
return module[0]
|
|
47
|
+
return ''
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_type_name(obj: Any) -> str:
|
|
51
|
+
"""Get the type name of an object."""
|
|
52
|
+
if hasattr(obj, '__name__'):
|
|
53
|
+
return obj.__name__
|
|
54
|
+
if hasattr(obj, '__class__'):
|
|
55
|
+
return obj.__class__.__name__
|
|
56
|
+
return ''
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def is_numpy(obj: Any) -> bool:
|
|
60
|
+
"""Check if an object is a numpy array."""
|
|
61
|
+
if str(obj).startswith('numpy.ndarray['):
|
|
62
|
+
return True
|
|
63
|
+
|
|
64
|
+
if inspect.isclass(obj):
|
|
65
|
+
if get_module(obj) == 'numpy':
|
|
66
|
+
return get_type_name(obj) == 'ndarray'
|
|
67
|
+
|
|
68
|
+
origin = typing.get_origin(obj)
|
|
69
|
+
if get_module(origin) == 'numpy':
|
|
70
|
+
if get_type_name(origin) == 'ndarray':
|
|
71
|
+
return True
|
|
72
|
+
|
|
73
|
+
dtype = type(obj)
|
|
74
|
+
if get_module(dtype) == 'numpy':
|
|
75
|
+
return get_type_name(dtype) == 'ndarray'
|
|
76
|
+
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def is_dataframe(obj: Any) -> bool:
|
|
81
|
+
"""Check if an object is a DataFrame."""
|
|
82
|
+
# Cheating here a bit so we don't have to import pandas / polars / pyarrow:
|
|
83
|
+
# unless we absolutely need to
|
|
84
|
+
if get_module(obj) == 'pandas':
|
|
85
|
+
return get_type_name(obj) == 'DataFrame'
|
|
86
|
+
if get_module(obj) == 'polars':
|
|
87
|
+
return get_type_name(obj) == 'DataFrame'
|
|
88
|
+
if get_module(obj) == 'pyarrow':
|
|
89
|
+
return get_type_name(obj) == 'Table'
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def is_vector(obj: Any, include_masks: bool = False) -> bool:
|
|
94
|
+
"""Check if an object is a vector type."""
|
|
95
|
+
return is_pandas_series(obj) \
|
|
96
|
+
or is_polars_series(obj) \
|
|
97
|
+
or is_pyarrow_array(obj) \
|
|
98
|
+
or is_numpy(obj) \
|
|
99
|
+
or is_masked(obj)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_data_format(obj: Any) -> str:
|
|
103
|
+
"""Return the data format of the DataFrame / Table / vector."""
|
|
104
|
+
# Cheating here a bit so we don't have to import pandas / polars / pyarrow
|
|
105
|
+
# unless we absolutely need to
|
|
106
|
+
if get_module(obj) == 'pandas':
|
|
107
|
+
return 'pandas'
|
|
108
|
+
if get_module(obj) == 'polars':
|
|
109
|
+
return 'polars'
|
|
110
|
+
if get_module(obj) == 'pyarrow':
|
|
111
|
+
return 'arrow'
|
|
112
|
+
if get_module(obj) == 'numpy':
|
|
113
|
+
return 'numpy'
|
|
114
|
+
if isinstance(obj, list):
|
|
115
|
+
return 'list'
|
|
116
|
+
return 'scalar'
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def is_pandas_series(obj: Any) -> bool:
|
|
120
|
+
"""Check if an object is a pandas Series."""
|
|
121
|
+
if is_union(obj):
|
|
122
|
+
obj = typing.get_args(obj)[0]
|
|
123
|
+
return (
|
|
124
|
+
get_module(obj) == 'pandas' and
|
|
125
|
+
get_type_name(obj) == 'Series'
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def is_polars_series(obj: Any) -> bool:
|
|
130
|
+
"""Check if an object is a polars Series."""
|
|
131
|
+
if is_union(obj):
|
|
132
|
+
obj = typing.get_args(obj)[0]
|
|
133
|
+
return (
|
|
134
|
+
get_module(obj) == 'polars' and
|
|
135
|
+
get_type_name(obj) == 'Series'
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def is_pyarrow_array(obj: Any) -> bool:
|
|
140
|
+
"""Check if an object is a pyarrow Array."""
|
|
141
|
+
if is_union(obj):
|
|
142
|
+
obj = typing.get_args(obj)[0]
|
|
143
|
+
return (
|
|
144
|
+
get_module(obj) == 'pyarrow' and
|
|
145
|
+
get_type_name(obj) == 'Array'
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def is_typeddict(obj: Any) -> bool:
|
|
150
|
+
"""Check if an object is a TypedDict."""
|
|
151
|
+
if hasattr(typing, 'is_typeddict'):
|
|
152
|
+
return typing.is_typeddict(obj) # noqa: TYP006
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def is_namedtuple(obj: Any) -> bool:
|
|
157
|
+
"""Check if an object is a named tuple."""
|
|
158
|
+
if inspect.isclass(obj):
|
|
159
|
+
return (
|
|
160
|
+
issubclass(obj, tuple) and
|
|
161
|
+
hasattr(obj, '_asdict') and
|
|
162
|
+
hasattr(obj, '_fields')
|
|
163
|
+
)
|
|
164
|
+
return (
|
|
165
|
+
isinstance(obj, tuple) and
|
|
166
|
+
hasattr(obj, '_asdict') and
|
|
167
|
+
hasattr(obj, '_fields')
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def is_pydantic(obj: Any) -> bool:
|
|
172
|
+
"""Check if an object is a pydantic model."""
|
|
173
|
+
if not inspect.isclass(obj):
|
|
174
|
+
return False
|
|
175
|
+
# We don't want to import pydantic here, so we check if
|
|
176
|
+
# the class is a subclass
|
|
177
|
+
return bool([
|
|
178
|
+
x for x in inspect.getmro(obj)
|
|
179
|
+
if get_module(x) == 'pydantic'
|
|
180
|
+
and get_type_name(x) == 'BaseModel'
|
|
181
|
+
])
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class VectorTypes(str, Enum):
|
|
185
|
+
"""Enum for vector types."""
|
|
186
|
+
F16 = 'f16'
|
|
187
|
+
F32 = 'f32'
|
|
188
|
+
F64 = 'f64'
|
|
189
|
+
I8 = 'i8'
|
|
190
|
+
I16 = 'i16'
|
|
191
|
+
I32 = 'i32'
|
|
192
|
+
I64 = 'i64'
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def unpack_vector(
|
|
196
|
+
obj: Any,
|
|
197
|
+
element_type: VectorTypes = VectorTypes.F32,
|
|
198
|
+
) -> Iterable[Any]:
|
|
199
|
+
"""
|
|
200
|
+
Unpack a vector from bytes.
|
|
201
|
+
|
|
202
|
+
Parameters
|
|
203
|
+
----------
|
|
204
|
+
obj : Any
|
|
205
|
+
The object to unpack.
|
|
206
|
+
element_type : VectorTypes
|
|
207
|
+
The type of the elements in the vector.
|
|
208
|
+
Can be one of 'f32', 'f64', 'i8', 'i16', 'i32', or 'i64'.
|
|
209
|
+
Default is 'f32'.
|
|
210
|
+
|
|
211
|
+
Returns
|
|
212
|
+
-------
|
|
213
|
+
Iterable[Any]
|
|
214
|
+
The unpacked vector.
|
|
215
|
+
|
|
216
|
+
"""
|
|
217
|
+
if isinstance(obj, (bytes, bytearray, list, tuple)):
|
|
218
|
+
if element_type == 'f32':
|
|
219
|
+
n = len(obj) // 4
|
|
220
|
+
fmt = 'f'
|
|
221
|
+
elif element_type == 'f64':
|
|
222
|
+
n = len(obj) // 8
|
|
223
|
+
fmt = 'd'
|
|
224
|
+
elif element_type == 'i8':
|
|
225
|
+
n = len(obj)
|
|
226
|
+
fmt = 'b'
|
|
227
|
+
elif element_type == 'i16':
|
|
228
|
+
n = len(obj) // 2
|
|
229
|
+
fmt = 'h'
|
|
230
|
+
elif element_type == 'i32':
|
|
231
|
+
n = len(obj) // 4
|
|
232
|
+
fmt = 'i'
|
|
233
|
+
elif element_type == 'i64':
|
|
234
|
+
n = len(obj) // 8
|
|
235
|
+
fmt = 'q'
|
|
236
|
+
else:
|
|
237
|
+
raise ValueError(f'unsupported element type: {element_type}')
|
|
238
|
+
|
|
239
|
+
if isinstance(obj, (bytes, bytearray)):
|
|
240
|
+
return struct.unpack(f'<{n}{fmt}', obj)
|
|
241
|
+
return tuple([struct.unpack(f'<{n}{fmt}', x) for x in obj])
|
|
242
|
+
|
|
243
|
+
if element_type == 'f32':
|
|
244
|
+
np_type = 'f4'
|
|
245
|
+
elif element_type == 'f64':
|
|
246
|
+
np_type = 'f8'
|
|
247
|
+
elif element_type == 'i8':
|
|
248
|
+
np_type = 'i1'
|
|
249
|
+
elif element_type == 'i16':
|
|
250
|
+
np_type = 'i2'
|
|
251
|
+
elif element_type == 'i32':
|
|
252
|
+
np_type = 'i4'
|
|
253
|
+
elif element_type == 'i64':
|
|
254
|
+
np_type = 'i8'
|
|
255
|
+
else:
|
|
256
|
+
raise ValueError(f'unsupported element type: {element_type}')
|
|
257
|
+
|
|
258
|
+
if is_numpy(obj):
|
|
259
|
+
import numpy as np
|
|
260
|
+
return np.array([np.frombuffer(x, dtype=np_type) for x in obj])
|
|
261
|
+
|
|
262
|
+
if is_pandas_series(obj):
|
|
263
|
+
import numpy as np
|
|
264
|
+
import pandas as pd
|
|
265
|
+
return pd.Series([np.frombuffer(x, dtype=np_type) for x in obj])
|
|
266
|
+
|
|
267
|
+
if is_polars_series(obj):
|
|
268
|
+
import numpy as np
|
|
269
|
+
import polars as pl
|
|
270
|
+
return pl.Series([np.frombuffer(x, dtype=np_type) for x in obj])
|
|
271
|
+
|
|
272
|
+
if is_pyarrow_array(obj):
|
|
273
|
+
import numpy as np
|
|
274
|
+
import pyarrow as pa
|
|
275
|
+
return pa.array([np.frombuffer(x, dtype=np_type) for x in obj])
|
|
276
|
+
|
|
277
|
+
raise ValueError(
|
|
278
|
+
f'unsupported object type: {type(obj)}',
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def pack_vector(
|
|
283
|
+
obj: Any,
|
|
284
|
+
element_type: VectorTypes = VectorTypes.F32,
|
|
285
|
+
) -> bytes:
|
|
286
|
+
"""
|
|
287
|
+
Pack a vector into bytes.
|
|
288
|
+
|
|
289
|
+
Parameters
|
|
290
|
+
----------
|
|
291
|
+
obj : Any
|
|
292
|
+
The object to pack.
|
|
293
|
+
element_type : VectorTypes
|
|
294
|
+
The type of the elements in the vector.
|
|
295
|
+
Can be one of 'f32', 'f64', 'i8', 'i16', 'i32', or 'i64'.
|
|
296
|
+
Default is 'f32'.
|
|
297
|
+
|
|
298
|
+
Returns
|
|
299
|
+
-------
|
|
300
|
+
bytes
|
|
301
|
+
The packed vector.
|
|
302
|
+
|
|
303
|
+
"""
|
|
304
|
+
if element_type == 'f32':
|
|
305
|
+
fmt = 'f'
|
|
306
|
+
elif element_type == 'f64':
|
|
307
|
+
fmt = 'd'
|
|
308
|
+
elif element_type == 'i8':
|
|
309
|
+
fmt = 'b'
|
|
310
|
+
elif element_type == 'i16':
|
|
311
|
+
fmt = 'h'
|
|
312
|
+
elif element_type == 'i32':
|
|
313
|
+
fmt = 'i'
|
|
314
|
+
elif element_type == 'i64':
|
|
315
|
+
fmt = 'q'
|
|
316
|
+
else:
|
|
317
|
+
raise ValueError(f'unsupported element type: {element_type}')
|
|
318
|
+
|
|
319
|
+
if isinstance(obj, (list, tuple)):
|
|
320
|
+
return struct.pack(f'<{len(obj)}{fmt}', *obj)
|
|
321
|
+
|
|
322
|
+
elif is_numpy(obj):
|
|
323
|
+
return obj.tobytes()
|
|
324
|
+
|
|
325
|
+
elif is_pandas_series(obj):
|
|
326
|
+
# TODO: Nested vectors
|
|
327
|
+
import pandas as pd
|
|
328
|
+
return pd.Series(obj).to_numpy().tobytes()
|
|
329
|
+
|
|
330
|
+
elif is_polars_series(obj):
|
|
331
|
+
# TODO: Nested vectors
|
|
332
|
+
import polars as pl
|
|
333
|
+
return pl.Series(obj).to_numpy().tobytes()
|
|
334
|
+
|
|
335
|
+
elif is_pyarrow_array(obj):
|
|
336
|
+
# TODO: Nested vectors
|
|
337
|
+
import pyarrow as pa
|
|
338
|
+
return pa.array(obj).to_numpy().tobytes()
|
|
339
|
+
|
|
340
|
+
raise ValueError(
|
|
341
|
+
f'unsupported object type: {type(obj)}',
|
|
342
|
+
)
|
singlestoredb/http/connection.py
CHANGED
|
@@ -648,7 +648,9 @@ class Cursor(connection.Cursor):
|
|
|
648
648
|
if 'UNSIGNED' in data_type:
|
|
649
649
|
flags = 32
|
|
650
650
|
if data_type.endswith('BLOB') or data_type.endswith('BINARY'):
|
|
651
|
-
converter = functools.partial(
|
|
651
|
+
converter = functools.partial(
|
|
652
|
+
b64decode_converter, converter, # type: ignore
|
|
653
|
+
)
|
|
652
654
|
charset = 63 # BINARY
|
|
653
655
|
if type_code == 0: # DECIMAL
|
|
654
656
|
type_code = types.ColumnType.get_code('NEWDECIMAL')
|
|
@@ -62,6 +62,7 @@ class Manager(object):
|
|
|
62
62
|
)
|
|
63
63
|
if not new_access_token:
|
|
64
64
|
raise ManagementError(msg='No management token was configured.')
|
|
65
|
+
|
|
65
66
|
self._is_jwt = not access_token and new_access_token and is_jwt(new_access_token)
|
|
66
67
|
self._sess = requests.Session()
|
|
67
68
|
self._sess.headers.update({
|
|
@@ -70,10 +71,14 @@ class Manager(object):
|
|
|
70
71
|
'Accept': 'application/json',
|
|
71
72
|
'User-Agent': f'SingleStoreDB-Python/{client_version}',
|
|
72
73
|
})
|
|
74
|
+
|
|
73
75
|
self._base_url = urljoin(
|
|
74
|
-
base_url
|
|
76
|
+
base_url
|
|
77
|
+
or config.get_option('management.base_url')
|
|
78
|
+
or type(self).default_base_url,
|
|
75
79
|
version or type(self).default_version,
|
|
76
80
|
) + '/'
|
|
81
|
+
|
|
77
82
|
self._params: Dict[str, str] = {}
|
|
78
83
|
if organization_id:
|
|
79
84
|
self._params['organizationID'] = organization_id
|
|
@@ -30,7 +30,7 @@ JSONList = List[JSON]
|
|
|
30
30
|
T = TypeVar('T')
|
|
31
31
|
|
|
32
32
|
if sys.version_info < (3, 10):
|
|
33
|
-
PathLike = Union[str, os.PathLike]
|
|
33
|
+
PathLike = Union[str, os.PathLike] # type: ignore
|
|
34
34
|
PathLikeABC = os.PathLike
|
|
35
35
|
else:
|
|
36
36
|
PathLike = Union[str, os.PathLike[str]]
|
|
@@ -73,7 +73,7 @@ def ttl_property(ttl: datetime.timedelta) -> Callable[[Any], Any]:
|
|
|
73
73
|
"""Property with a time-to-live."""
|
|
74
74
|
def wrapper(func: Callable[[Any], Any]) -> Any:
|
|
75
75
|
out = TTLProperty(func, ttl=ttl)
|
|
76
|
-
return functools.wraps(func)(out)
|
|
76
|
+
return functools.wraps(func)(out) # type: ignore
|
|
77
77
|
return wrapper
|
|
78
78
|
|
|
79
79
|
|
|
@@ -989,18 +989,11 @@ class Connection(BaseConnection):
|
|
|
989
989
|
|
|
990
990
|
def set_character_set(self, charset, collation=None):
|
|
991
991
|
"""
|
|
992
|
-
Set
|
|
992
|
+
Set charaset (and collation) on the server.
|
|
993
993
|
|
|
994
|
-
Send "SET
|
|
994
|
+
Send "SET NAMES charset [COLLATE collation]" query.
|
|
995
995
|
Update Connection.encoding based on charset.
|
|
996
996
|
|
|
997
|
-
If charset/collation are being set to utf8mb4, the corresponding global
|
|
998
|
-
variables (COLLATION_SERVER and CHARACTER_SET_SERVER) must be also set
|
|
999
|
-
to utf8mb4. This is true by default for SingleStore 8.7+. For previuous
|
|
1000
|
-
versions or non-default setting user must manully run the query
|
|
1001
|
-
`SET global collation_connection = utf8mb4_general_ci`
|
|
1002
|
-
replacing utf8mb4_general_ci with {collation}.
|
|
1003
|
-
|
|
1004
997
|
Parameters
|
|
1005
998
|
----------
|
|
1006
999
|
charset : str
|
|
@@ -1013,9 +1006,9 @@ class Connection(BaseConnection):
|
|
|
1013
1006
|
encoding = charset_by_name(charset).encoding
|
|
1014
1007
|
|
|
1015
1008
|
if collation:
|
|
1016
|
-
query = f'SET
|
|
1009
|
+
query = f'SET NAMES {charset} COLLATE {collation}'
|
|
1017
1010
|
else:
|
|
1018
|
-
query = f'SET
|
|
1011
|
+
query = f'SET NAMES {charset}'
|
|
1019
1012
|
self._execute_command(COMMAND.COM_QUERY, query)
|
|
1020
1013
|
self._read_packet()
|
|
1021
1014
|
self.charset = charset
|
|
@@ -1119,6 +1112,19 @@ class Connection(BaseConnection):
|
|
|
1119
1112
|
self._get_server_information()
|
|
1120
1113
|
self._request_authentication()
|
|
1121
1114
|
|
|
1115
|
+
# Send "SET NAMES" query on init for:
|
|
1116
|
+
# - Ensure charaset (and collation) is set to the server.
|
|
1117
|
+
# - collation_id in handshake packet may be ignored.
|
|
1118
|
+
# - If collation is not specified, we don't know what is server's
|
|
1119
|
+
# default collation for the charset. For example, default collation
|
|
1120
|
+
# of utf8mb4 is:
|
|
1121
|
+
# - MySQL 5.7, MariaDB 10.x: utf8mb4_general_ci
|
|
1122
|
+
# - MySQL 8.0: utf8mb4_0900_ai_ci
|
|
1123
|
+
#
|
|
1124
|
+
# Reference:
|
|
1125
|
+
# - https://github.com/PyMySQL/PyMySQL/issues/1092
|
|
1126
|
+
# - https://github.com/wagtail/wagtail/issues/9477
|
|
1127
|
+
# - https://zenn.dev/methane/articles/2023-mysql-collation (Japanese)
|
|
1122
1128
|
self.set_character_set(self.charset, self.collation)
|
|
1123
1129
|
|
|
1124
1130
|
if self.sql_mode is not None:
|