duckdb 1.5.0.dev86__cp314-cp314-macosx_10_15_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckdb might be problematic. Click here for more details.
- _duckdb-stubs/__init__.pyi +1443 -0
- _duckdb-stubs/_func.pyi +46 -0
- _duckdb-stubs/_sqltypes.pyi +75 -0
- _duckdb.cpython-314-darwin.so +0 -0
- adbc_driver_duckdb/__init__.py +50 -0
- adbc_driver_duckdb/dbapi.py +115 -0
- duckdb/__init__.py +381 -0
- duckdb/_dbapi_type_object.py +231 -0
- duckdb/_version.py +22 -0
- duckdb/bytes_io_wrapper.py +69 -0
- duckdb/experimental/__init__.py +3 -0
- duckdb/experimental/spark/LICENSE +260 -0
- duckdb/experimental/spark/__init__.py +6 -0
- duckdb/experimental/spark/_globals.py +77 -0
- duckdb/experimental/spark/_typing.py +46 -0
- duckdb/experimental/spark/conf.py +46 -0
- duckdb/experimental/spark/context.py +180 -0
- duckdb/experimental/spark/errors/__init__.py +70 -0
- duckdb/experimental/spark/errors/error_classes.py +918 -0
- duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
- duckdb/experimental/spark/errors/exceptions/base.py +168 -0
- duckdb/experimental/spark/errors/utils.py +111 -0
- duckdb/experimental/spark/exception.py +18 -0
- duckdb/experimental/spark/sql/__init__.py +7 -0
- duckdb/experimental/spark/sql/_typing.py +86 -0
- duckdb/experimental/spark/sql/catalog.py +79 -0
- duckdb/experimental/spark/sql/column.py +361 -0
- duckdb/experimental/spark/sql/conf.py +24 -0
- duckdb/experimental/spark/sql/dataframe.py +1389 -0
- duckdb/experimental/spark/sql/functions.py +6195 -0
- duckdb/experimental/spark/sql/group.py +424 -0
- duckdb/experimental/spark/sql/readwriter.py +435 -0
- duckdb/experimental/spark/sql/session.py +297 -0
- duckdb/experimental/spark/sql/streaming.py +36 -0
- duckdb/experimental/spark/sql/type_utils.py +107 -0
- duckdb/experimental/spark/sql/types.py +1239 -0
- duckdb/experimental/spark/sql/udf.py +37 -0
- duckdb/filesystem.py +33 -0
- duckdb/func/__init__.py +3 -0
- duckdb/functional/__init__.py +13 -0
- duckdb/polars_io.py +284 -0
- duckdb/py.typed +0 -0
- duckdb/query_graph/__main__.py +358 -0
- duckdb/sqltypes/__init__.py +63 -0
- duckdb/typing/__init__.py +71 -0
- duckdb/udf.py +24 -0
- duckdb/value/__init__.py +1 -0
- duckdb/value/constant/__init__.py +270 -0
- duckdb-1.5.0.dev86.dist-info/METADATA +88 -0
- duckdb-1.5.0.dev86.dist-info/RECORD +52 -0
- duckdb-1.5.0.dev86.dist-info/WHEEL +6 -0
- duckdb-1.5.0.dev86.dist-info/licenses/LICENSE +7 -0
|
@@ -0,0 +1,1239 @@
|
|
|
1
|
+
# ruff: noqa: D100
|
|
2
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE
|
|
3
|
+
# file located in the 'spark' folder.
|
|
4
|
+
|
|
5
|
+
import calendar
|
|
6
|
+
import datetime
|
|
7
|
+
import math
|
|
8
|
+
import re
|
|
9
|
+
import time
|
|
10
|
+
from builtins import tuple
|
|
11
|
+
from collections.abc import Iterator, Mapping
|
|
12
|
+
from types import MappingProxyType
|
|
13
|
+
from typing import (
|
|
14
|
+
Any,
|
|
15
|
+
ClassVar,
|
|
16
|
+
NoReturn,
|
|
17
|
+
Optional,
|
|
18
|
+
TypeVar,
|
|
19
|
+
Union,
|
|
20
|
+
cast,
|
|
21
|
+
overload,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
import duckdb
|
|
25
|
+
from duckdb.sqltypes import DuckDBPyType
|
|
26
|
+
|
|
27
|
+
from ..exception import ContributionsAcceptedError
|
|
28
|
+
|
|
29
|
+
T = TypeVar("T")
|
|
30
|
+
U = TypeVar("U")
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"ArrayType",
|
|
34
|
+
"BinaryType",
|
|
35
|
+
"BitstringType",
|
|
36
|
+
"BooleanType",
|
|
37
|
+
"ByteType",
|
|
38
|
+
"DataType",
|
|
39
|
+
"DateType",
|
|
40
|
+
"DayTimeIntervalType",
|
|
41
|
+
"DecimalType",
|
|
42
|
+
"DoubleType",
|
|
43
|
+
"FloatType",
|
|
44
|
+
"HugeIntegerType",
|
|
45
|
+
"IntegerType",
|
|
46
|
+
"LongType",
|
|
47
|
+
"MapType",
|
|
48
|
+
"NullType",
|
|
49
|
+
"Row",
|
|
50
|
+
"ShortType",
|
|
51
|
+
"StringType",
|
|
52
|
+
"StructField",
|
|
53
|
+
"StructType",
|
|
54
|
+
"TimeNTZType",
|
|
55
|
+
"TimeType",
|
|
56
|
+
"TimestampMilisecondNTZType",
|
|
57
|
+
"TimestampNTZType",
|
|
58
|
+
"TimestampNanosecondNTZType",
|
|
59
|
+
"TimestampSecondNTZType",
|
|
60
|
+
"TimestampType",
|
|
61
|
+
"UUIDType",
|
|
62
|
+
"UnsignedByteType",
|
|
63
|
+
"UnsignedHugeIntegerType",
|
|
64
|
+
"UnsignedIntegerType",
|
|
65
|
+
"UnsignedLongType",
|
|
66
|
+
"UnsignedShortType",
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class DataType:
|
|
71
|
+
"""Base class for data types."""
|
|
72
|
+
|
|
73
|
+
def __init__(self, duckdb_type: DuckDBPyType) -> None: # noqa: D107
|
|
74
|
+
self.duckdb_type = duckdb_type
|
|
75
|
+
|
|
76
|
+
def __repr__(self) -> str: # noqa: D105
|
|
77
|
+
return self.__class__.__name__ + "()"
|
|
78
|
+
|
|
79
|
+
def __hash__(self) -> int: # noqa: D105
|
|
80
|
+
return hash(str(self))
|
|
81
|
+
|
|
82
|
+
def __eq__(self, other: object) -> bool: # noqa: D105
|
|
83
|
+
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
|
84
|
+
|
|
85
|
+
def __ne__(self, other: object) -> bool: # noqa: D105
|
|
86
|
+
return not self.__eq__(other)
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def typeName(cls) -> str: # noqa: D102
|
|
90
|
+
return cls.__name__[:-4].lower()
|
|
91
|
+
|
|
92
|
+
def simpleString(self) -> str: # noqa: D102
|
|
93
|
+
return self.typeName()
|
|
94
|
+
|
|
95
|
+
def jsonValue(self) -> Union[str, dict[str, Any]]: # noqa: D102
|
|
96
|
+
raise ContributionsAcceptedError
|
|
97
|
+
|
|
98
|
+
def json(self) -> str: # noqa: D102
|
|
99
|
+
raise ContributionsAcceptedError
|
|
100
|
+
|
|
101
|
+
def needConversion(self) -> bool:
|
|
102
|
+
"""Does this type needs conversion between Python object and internal SQL object.
|
|
103
|
+
|
|
104
|
+
This is used to avoid the unnecessary conversion for ArrayType/MapType/StructType.
|
|
105
|
+
"""
|
|
106
|
+
return False
|
|
107
|
+
|
|
108
|
+
def toInternal(self, obj: Any) -> Any: # noqa: ANN401
|
|
109
|
+
"""Converts a Python object into an internal SQL object."""
|
|
110
|
+
return obj
|
|
111
|
+
|
|
112
|
+
def fromInternal(self, obj: Any) -> Any: # noqa: ANN401
|
|
113
|
+
"""Converts an internal SQL object into a native Python object."""
|
|
114
|
+
return obj
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# This singleton pattern does not work with pickle, you will get
|
|
118
|
+
# another object after pickle and unpickle
|
|
119
|
+
class DataTypeSingleton(type):
|
|
120
|
+
"""Metaclass for DataType."""
|
|
121
|
+
|
|
122
|
+
_instances: ClassVar[dict[type["DataTypeSingleton"], "DataTypeSingleton"]] = {}
|
|
123
|
+
|
|
124
|
+
def __call__(cls: type[T]) -> T: # type: ignore[override]
|
|
125
|
+
if cls not in cls._instances: # type: ignore[attr-defined]
|
|
126
|
+
cls._instances[cls] = super().__call__() # type: ignore[misc, attr-defined]
|
|
127
|
+
return cls._instances[cls] # type: ignore[attr-defined]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class NullType(DataType, metaclass=DataTypeSingleton):
|
|
131
|
+
"""Null type.
|
|
132
|
+
|
|
133
|
+
The data type representing None, used for the types that cannot be inferred.
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
def __init__(self) -> None: # noqa: D107
|
|
137
|
+
super().__init__(DuckDBPyType("NULL"))
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def typeName(cls) -> str: # noqa: D102
|
|
141
|
+
return "void"
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class AtomicType(DataType):
|
|
145
|
+
"""An internal type used to represent everything that is not
|
|
146
|
+
null, UDTs, arrays, structs, and maps.
|
|
147
|
+
""" # noqa: D205
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class NumericType(AtomicType):
|
|
151
|
+
"""Numeric data types."""
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class IntegralType(NumericType, metaclass=DataTypeSingleton):
|
|
155
|
+
"""Integral data types."""
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class FractionalType(NumericType):
|
|
159
|
+
"""Fractional data types."""
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class StringType(AtomicType, metaclass=DataTypeSingleton):
|
|
163
|
+
"""String data type."""
|
|
164
|
+
|
|
165
|
+
def __init__(self) -> None: # noqa: D107
|
|
166
|
+
super().__init__(DuckDBPyType("VARCHAR"))
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class BitstringType(AtomicType, metaclass=DataTypeSingleton):
|
|
170
|
+
"""Bitstring data type."""
|
|
171
|
+
|
|
172
|
+
def __init__(self) -> None: # noqa: D107
|
|
173
|
+
super().__init__(DuckDBPyType("BIT"))
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class UUIDType(AtomicType, metaclass=DataTypeSingleton):
|
|
177
|
+
"""UUID data type."""
|
|
178
|
+
|
|
179
|
+
def __init__(self) -> None: # noqa: D107
|
|
180
|
+
super().__init__(DuckDBPyType("UUID"))
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class BinaryType(AtomicType, metaclass=DataTypeSingleton):
|
|
184
|
+
"""Binary (byte array) data type."""
|
|
185
|
+
|
|
186
|
+
def __init__(self) -> None: # noqa: D107
|
|
187
|
+
super().__init__(DuckDBPyType("BLOB"))
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class BooleanType(AtomicType, metaclass=DataTypeSingleton):
|
|
191
|
+
"""Boolean data type."""
|
|
192
|
+
|
|
193
|
+
def __init__(self) -> None: # noqa: D107
|
|
194
|
+
super().__init__(DuckDBPyType("BOOLEAN"))
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class DateType(AtomicType, metaclass=DataTypeSingleton):
|
|
198
|
+
"""Date (datetime.date) data type."""
|
|
199
|
+
|
|
200
|
+
def __init__(self) -> None: # noqa: D107
|
|
201
|
+
super().__init__(DuckDBPyType("DATE"))
|
|
202
|
+
|
|
203
|
+
EPOCH_ORDINAL = datetime.datetime(1970, 1, 1).toordinal()
|
|
204
|
+
|
|
205
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
206
|
+
return True
|
|
207
|
+
|
|
208
|
+
def toInternal(self, d: datetime.date) -> int: # noqa: D102
|
|
209
|
+
if d is not None:
|
|
210
|
+
return d.toordinal() - self.EPOCH_ORDINAL
|
|
211
|
+
|
|
212
|
+
def fromInternal(self, v: int) -> datetime.date: # noqa: D102
|
|
213
|
+
if v is not None:
|
|
214
|
+
return datetime.date.fromordinal(v + self.EPOCH_ORDINAL)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class TimestampType(AtomicType, metaclass=DataTypeSingleton):
|
|
218
|
+
"""Timestamp (datetime.datetime) data type."""
|
|
219
|
+
|
|
220
|
+
def __init__(self) -> None: # noqa: D107
|
|
221
|
+
super().__init__(DuckDBPyType("TIMESTAMPTZ"))
|
|
222
|
+
|
|
223
|
+
@classmethod
|
|
224
|
+
def typeName(cls) -> str: # noqa: D102
|
|
225
|
+
return "timestamptz"
|
|
226
|
+
|
|
227
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
228
|
+
return True
|
|
229
|
+
|
|
230
|
+
def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
|
|
231
|
+
if dt is not None:
|
|
232
|
+
seconds = calendar.timegm(dt.utctimetuple()) if dt.tzinfo else time.mktime(dt.timetuple())
|
|
233
|
+
return int(seconds) * 1000000 + dt.microsecond
|
|
234
|
+
|
|
235
|
+
def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
|
|
236
|
+
if ts is not None:
|
|
237
|
+
# using int to avoid precision loss in float
|
|
238
|
+
return datetime.datetime.fromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class TimestampNTZType(AtomicType, metaclass=DataTypeSingleton):
|
|
242
|
+
"""Timestamp (datetime.datetime) data type without timezone information with microsecond precision."""
|
|
243
|
+
|
|
244
|
+
def __init__(self) -> None: # noqa: D107
|
|
245
|
+
super().__init__(DuckDBPyType("TIMESTAMP"))
|
|
246
|
+
|
|
247
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
248
|
+
return True
|
|
249
|
+
|
|
250
|
+
@classmethod
|
|
251
|
+
def typeName(cls) -> str: # noqa: D102
|
|
252
|
+
return "timestamp"
|
|
253
|
+
|
|
254
|
+
def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
|
|
255
|
+
if dt is not None:
|
|
256
|
+
seconds = calendar.timegm(dt.timetuple())
|
|
257
|
+
return int(seconds) * 1000000 + dt.microsecond
|
|
258
|
+
|
|
259
|
+
def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
|
|
260
|
+
if ts is not None:
|
|
261
|
+
# using int to avoid precision loss in float
|
|
262
|
+
return datetime.datetime.utcfromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class TimestampSecondNTZType(AtomicType, metaclass=DataTypeSingleton):
|
|
266
|
+
"""Timestamp (datetime.datetime) data type without timezone information with second precision."""
|
|
267
|
+
|
|
268
|
+
def __init__(self) -> None: # noqa: D107
|
|
269
|
+
super().__init__(DuckDBPyType("TIMESTAMP_S"))
|
|
270
|
+
|
|
271
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
272
|
+
return True
|
|
273
|
+
|
|
274
|
+
@classmethod
|
|
275
|
+
def typeName(cls) -> str: # noqa: D102
|
|
276
|
+
return "timestamp_s"
|
|
277
|
+
|
|
278
|
+
def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
|
|
279
|
+
raise ContributionsAcceptedError
|
|
280
|
+
|
|
281
|
+
def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
|
|
282
|
+
raise ContributionsAcceptedError
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class TimestampMilisecondNTZType(AtomicType, metaclass=DataTypeSingleton):
|
|
286
|
+
"""Timestamp (datetime.datetime) data type without timezone information with milisecond precision."""
|
|
287
|
+
|
|
288
|
+
def __init__(self) -> None: # noqa: D107
|
|
289
|
+
super().__init__(DuckDBPyType("TIMESTAMP_MS"))
|
|
290
|
+
|
|
291
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
292
|
+
return True
|
|
293
|
+
|
|
294
|
+
@classmethod
|
|
295
|
+
def typeName(cls) -> str: # noqa: D102
|
|
296
|
+
return "timestamp_ms"
|
|
297
|
+
|
|
298
|
+
def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
|
|
299
|
+
raise ContributionsAcceptedError
|
|
300
|
+
|
|
301
|
+
def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
|
|
302
|
+
raise ContributionsAcceptedError
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class TimestampNanosecondNTZType(AtomicType, metaclass=DataTypeSingleton):
|
|
306
|
+
"""Timestamp (datetime.datetime) data type without timezone information with nanosecond precision."""
|
|
307
|
+
|
|
308
|
+
def __init__(self) -> None: # noqa: D107
|
|
309
|
+
super().__init__(DuckDBPyType("TIMESTAMP_NS"))
|
|
310
|
+
|
|
311
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
312
|
+
return True
|
|
313
|
+
|
|
314
|
+
@classmethod
|
|
315
|
+
def typeName(cls) -> str: # noqa: D102
|
|
316
|
+
return "timestamp_ns"
|
|
317
|
+
|
|
318
|
+
def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
|
|
319
|
+
raise ContributionsAcceptedError
|
|
320
|
+
|
|
321
|
+
def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
|
|
322
|
+
raise ContributionsAcceptedError
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class DecimalType(FractionalType):
|
|
326
|
+
"""Decimal (decimal.Decimal) data type.
|
|
327
|
+
|
|
328
|
+
The DecimalType must have fixed precision (the maximum total number of digits)
|
|
329
|
+
and scale (the number of digits on the right of dot). For example, (5, 2) can
|
|
330
|
+
support the value from [-999.99 to 999.99].
|
|
331
|
+
|
|
332
|
+
The precision can be up to 38, the scale must be less or equal to precision.
|
|
333
|
+
|
|
334
|
+
When creating a DecimalType, the default precision and scale is (10, 0). When inferring
|
|
335
|
+
schema from decimal.Decimal objects, it will be DecimalType(38, 18).
|
|
336
|
+
|
|
337
|
+
Parameters
|
|
338
|
+
----------
|
|
339
|
+
precision : int, optional
|
|
340
|
+
the maximum (i.e. total) number of digits (default: 10)
|
|
341
|
+
scale : int, optional
|
|
342
|
+
the number of digits on right side of dot. (default: 0)
|
|
343
|
+
"""
|
|
344
|
+
|
|
345
|
+
def __init__(self, precision: int = 10, scale: int = 0) -> None: # noqa: D107
|
|
346
|
+
super().__init__(duckdb.decimal_type(precision, scale))
|
|
347
|
+
self.precision = precision
|
|
348
|
+
self.scale = scale
|
|
349
|
+
self.hasPrecisionInfo = True # this is a public API
|
|
350
|
+
|
|
351
|
+
def simpleString(self) -> str: # noqa: D102
|
|
352
|
+
return f"decimal({int(self.precision):d},{int(self.scale):d})"
|
|
353
|
+
|
|
354
|
+
def __repr__(self) -> str: # noqa: D105
|
|
355
|
+
return f"DecimalType({int(self.precision):d},{int(self.scale):d})"
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
class DoubleType(FractionalType, metaclass=DataTypeSingleton):
|
|
359
|
+
"""Double data type, representing double precision floats."""
|
|
360
|
+
|
|
361
|
+
def __init__(self) -> None: # noqa: D107
|
|
362
|
+
super().__init__(DuckDBPyType("DOUBLE"))
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
class FloatType(FractionalType, metaclass=DataTypeSingleton):
|
|
366
|
+
"""Float data type, representing single precision floats."""
|
|
367
|
+
|
|
368
|
+
def __init__(self) -> None: # noqa: D107
|
|
369
|
+
super().__init__(DuckDBPyType("FLOAT"))
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
class ByteType(IntegralType):
|
|
373
|
+
"""Byte data type, i.e. a signed integer in a single byte."""
|
|
374
|
+
|
|
375
|
+
def __init__(self) -> None: # noqa: D107
|
|
376
|
+
super().__init__(DuckDBPyType("TINYINT"))
|
|
377
|
+
|
|
378
|
+
def simpleString(self) -> str: # noqa: D102
|
|
379
|
+
return "tinyint"
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
class UnsignedByteType(IntegralType):
|
|
383
|
+
"""Unsigned byte data type, i.e. a unsigned integer in a single byte."""
|
|
384
|
+
|
|
385
|
+
def __init__(self) -> None: # noqa: D107
|
|
386
|
+
super().__init__(DuckDBPyType("UTINYINT"))
|
|
387
|
+
|
|
388
|
+
def simpleString(self) -> str: # noqa: D102
|
|
389
|
+
return "utinyint"
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
class ShortType(IntegralType):
|
|
393
|
+
"""Short data type, i.e. a signed 16-bit integer."""
|
|
394
|
+
|
|
395
|
+
def __init__(self) -> None: # noqa: D107
|
|
396
|
+
super().__init__(DuckDBPyType("SMALLINT"))
|
|
397
|
+
|
|
398
|
+
def simpleString(self) -> str: # noqa: D102
|
|
399
|
+
return "smallint"
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
class UnsignedShortType(IntegralType):
|
|
403
|
+
"""Unsigned short data type, i.e. a unsigned 16-bit integer."""
|
|
404
|
+
|
|
405
|
+
def __init__(self) -> None: # noqa: D107
|
|
406
|
+
super().__init__(DuckDBPyType("USMALLINT"))
|
|
407
|
+
|
|
408
|
+
def simpleString(self) -> str: # noqa: D102
|
|
409
|
+
return "usmallint"
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
class IntegerType(IntegralType):
|
|
413
|
+
"""Int data type, i.e. a signed 32-bit integer."""
|
|
414
|
+
|
|
415
|
+
def __init__(self) -> None: # noqa: D107
|
|
416
|
+
super().__init__(DuckDBPyType("INTEGER"))
|
|
417
|
+
|
|
418
|
+
def simpleString(self) -> str: # noqa: D102
|
|
419
|
+
return "integer"
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
class UnsignedIntegerType(IntegralType):
|
|
423
|
+
"""Unsigned int data type, i.e. a unsigned 32-bit integer."""
|
|
424
|
+
|
|
425
|
+
def __init__(self) -> None: # noqa: D107
|
|
426
|
+
super().__init__(DuckDBPyType("UINTEGER"))
|
|
427
|
+
|
|
428
|
+
def simpleString(self) -> str: # noqa: D102
|
|
429
|
+
return "uinteger"
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
class LongType(IntegralType):
|
|
433
|
+
"""Long data type, i.e. a signed 64-bit integer.
|
|
434
|
+
|
|
435
|
+
If the values are beyond the range of [-9223372036854775808, 9223372036854775807],
|
|
436
|
+
please use :class:`DecimalType`.
|
|
437
|
+
"""
|
|
438
|
+
|
|
439
|
+
def __init__(self) -> None: # noqa: D107
|
|
440
|
+
super().__init__(DuckDBPyType("BIGINT"))
|
|
441
|
+
|
|
442
|
+
def simpleString(self) -> str: # noqa: D102
|
|
443
|
+
return "bigint"
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
class UnsignedLongType(IntegralType):
|
|
447
|
+
"""Unsigned long data type, i.e. a unsigned 64-bit integer.
|
|
448
|
+
|
|
449
|
+
If the values are beyond the range of [0, 18446744073709551615],
|
|
450
|
+
please use :class:`HugeIntegerType`.
|
|
451
|
+
"""
|
|
452
|
+
|
|
453
|
+
def __init__(self) -> None: # noqa: D107
|
|
454
|
+
super().__init__(DuckDBPyType("UBIGINT"))
|
|
455
|
+
|
|
456
|
+
def simpleString(self) -> str: # noqa: D102
|
|
457
|
+
return "ubigint"
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
class HugeIntegerType(IntegralType):
|
|
461
|
+
"""Huge integer data type, i.e. a signed 128-bit integer.
|
|
462
|
+
|
|
463
|
+
If the values are beyond the range of [-170141183460469231731687303715884105728,
|
|
464
|
+
170141183460469231731687303715884105727], please use :class:`DecimalType`.
|
|
465
|
+
"""
|
|
466
|
+
|
|
467
|
+
def __init__(self) -> None: # noqa: D107
|
|
468
|
+
super().__init__(DuckDBPyType("HUGEINT"))
|
|
469
|
+
|
|
470
|
+
def simpleString(self) -> str: # noqa: D102
|
|
471
|
+
return "hugeint"
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
class UnsignedHugeIntegerType(IntegralType):
|
|
475
|
+
"""Unsigned huge integer data type, i.e. a unsigned 128-bit integer.
|
|
476
|
+
|
|
477
|
+
If the values are beyond the range of [0, 340282366920938463463374607431768211455],
|
|
478
|
+
please use :class:`DecimalType`.
|
|
479
|
+
"""
|
|
480
|
+
|
|
481
|
+
def __init__(self) -> None: # noqa: D107
|
|
482
|
+
super().__init__(DuckDBPyType("UHUGEINT"))
|
|
483
|
+
|
|
484
|
+
def simpleString(self) -> str: # noqa: D102
|
|
485
|
+
return "uhugeint"
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
class TimeType(IntegralType):
|
|
489
|
+
"""Time (datetime.time) data type."""
|
|
490
|
+
|
|
491
|
+
def __init__(self) -> None: # noqa: D107
|
|
492
|
+
super().__init__(DuckDBPyType("TIMETZ"))
|
|
493
|
+
|
|
494
|
+
def simpleString(self) -> str: # noqa: D102
|
|
495
|
+
return "timetz"
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
class TimeNTZType(IntegralType):
|
|
499
|
+
"""Time (datetime.time) data type without timezone information."""
|
|
500
|
+
|
|
501
|
+
def __init__(self) -> None: # noqa: D107
|
|
502
|
+
super().__init__(DuckDBPyType("TIME"))
|
|
503
|
+
|
|
504
|
+
def simpleString(self) -> str: # noqa: D102
|
|
505
|
+
return "time"
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
class DayTimeIntervalType(AtomicType):
|
|
509
|
+
"""DayTimeIntervalType (datetime.timedelta)."""
|
|
510
|
+
|
|
511
|
+
DAY = 0
|
|
512
|
+
HOUR = 1
|
|
513
|
+
MINUTE = 2
|
|
514
|
+
SECOND = 3
|
|
515
|
+
|
|
516
|
+
_fields: Mapping[str, int] = MappingProxyType(
|
|
517
|
+
{
|
|
518
|
+
DAY: "day",
|
|
519
|
+
HOUR: "hour",
|
|
520
|
+
MINUTE: "minute",
|
|
521
|
+
SECOND: "second",
|
|
522
|
+
}
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
_inverted_fields: Mapping[int, str] = MappingProxyType(dict(zip(_fields.values(), _fields.keys())))
|
|
526
|
+
|
|
527
|
+
def __init__(self, startField: Optional[int] = None, endField: Optional[int] = None) -> None: # noqa: D107
|
|
528
|
+
super().__init__(DuckDBPyType("INTERVAL"))
|
|
529
|
+
if startField is None and endField is None:
|
|
530
|
+
# Default matched to scala side.
|
|
531
|
+
startField = DayTimeIntervalType.DAY
|
|
532
|
+
endField = DayTimeIntervalType.SECOND
|
|
533
|
+
elif startField is not None and endField is None:
|
|
534
|
+
endField = startField
|
|
535
|
+
|
|
536
|
+
fields = DayTimeIntervalType._fields
|
|
537
|
+
if startField not in fields or endField not in fields:
|
|
538
|
+
msg = f"interval {startField} to {endField} is invalid"
|
|
539
|
+
raise RuntimeError(msg)
|
|
540
|
+
self.startField = cast("int", startField)
|
|
541
|
+
self.endField = cast("int", endField)
|
|
542
|
+
|
|
543
|
+
def _str_repr(self) -> str:
|
|
544
|
+
fields = DayTimeIntervalType._fields
|
|
545
|
+
start_field_name = fields[self.startField]
|
|
546
|
+
end_field_name = fields[self.endField]
|
|
547
|
+
if start_field_name == end_field_name:
|
|
548
|
+
return f"interval {start_field_name}"
|
|
549
|
+
else:
|
|
550
|
+
return f"interval {start_field_name} to {end_field_name}"
|
|
551
|
+
|
|
552
|
+
simpleString = _str_repr
|
|
553
|
+
|
|
554
|
+
def __repr__(self) -> str: # noqa: D105
|
|
555
|
+
return f"{type(self).__name__}({int(self.startField):d}, {int(self.endField):d})"
|
|
556
|
+
|
|
557
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
558
|
+
return True
|
|
559
|
+
|
|
560
|
+
def toInternal(self, dt: datetime.timedelta) -> Optional[int]: # noqa: D102
|
|
561
|
+
if dt is not None:
|
|
562
|
+
return (math.floor(dt.total_seconds()) * 1000000) + dt.microseconds
|
|
563
|
+
|
|
564
|
+
def fromInternal(self, micros: int) -> Optional[datetime.timedelta]: # noqa: D102
|
|
565
|
+
if micros is not None:
|
|
566
|
+
return datetime.timedelta(microseconds=micros)
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
class ArrayType(DataType):
|
|
570
|
+
"""Array data type.
|
|
571
|
+
|
|
572
|
+
Parameters
|
|
573
|
+
----------
|
|
574
|
+
elementType : :class:`DataType`
|
|
575
|
+
:class:`DataType` of each element in the array.
|
|
576
|
+
containsNull : bool, optional
|
|
577
|
+
whether the array can contain null (None) values.
|
|
578
|
+
|
|
579
|
+
Examples:
|
|
580
|
+
--------
|
|
581
|
+
>>> ArrayType(StringType()) == ArrayType(StringType(), True)
|
|
582
|
+
True
|
|
583
|
+
>>> ArrayType(StringType(), False) == ArrayType(StringType())
|
|
584
|
+
False
|
|
585
|
+
"""
|
|
586
|
+
|
|
587
|
+
def __init__(self, elementType: DataType, containsNull: bool = True) -> None: # noqa: D107
|
|
588
|
+
super().__init__(duckdb.list_type(elementType.duckdb_type))
|
|
589
|
+
assert isinstance(elementType, DataType), f"elementType {elementType} should be an instance of {DataType}"
|
|
590
|
+
self.elementType = elementType
|
|
591
|
+
self.containsNull = containsNull
|
|
592
|
+
|
|
593
|
+
def simpleString(self) -> str: # noqa: D102
|
|
594
|
+
return f"array<{self.elementType.simpleString()}>"
|
|
595
|
+
|
|
596
|
+
def __repr__(self) -> str: # noqa: D105
|
|
597
|
+
return f"ArrayType({self.elementType}, {self.containsNull!s})"
|
|
598
|
+
|
|
599
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
600
|
+
return self.elementType.needConversion()
|
|
601
|
+
|
|
602
|
+
def toInternal(self, obj: list[Optional[T]]) -> list[Optional[T]]: # noqa: D102
|
|
603
|
+
if not self.needConversion():
|
|
604
|
+
return obj
|
|
605
|
+
return obj and [self.elementType.toInternal(v) for v in obj]
|
|
606
|
+
|
|
607
|
+
def fromInternal(self, obj: list[Optional[T]]) -> list[Optional[T]]: # noqa: D102
|
|
608
|
+
if not self.needConversion():
|
|
609
|
+
return obj
|
|
610
|
+
return obj and [self.elementType.fromInternal(v) for v in obj]
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
class MapType(DataType):
|
|
614
|
+
"""Map data type.
|
|
615
|
+
|
|
616
|
+
Parameters
|
|
617
|
+
----------
|
|
618
|
+
keyType : :class:`DataType`
|
|
619
|
+
:class:`DataType` of the keys in the map.
|
|
620
|
+
valueType : :class:`DataType`
|
|
621
|
+
:class:`DataType` of the values in the map.
|
|
622
|
+
valueContainsNull : bool, optional
|
|
623
|
+
indicates whether values can contain null (None) values.
|
|
624
|
+
|
|
625
|
+
Notes:
|
|
626
|
+
-----
|
|
627
|
+
Keys in a map data type are not allowed to be null (None).
|
|
628
|
+
|
|
629
|
+
Examples:
|
|
630
|
+
--------
|
|
631
|
+
>>> (MapType(StringType(), IntegerType()) == MapType(StringType(), IntegerType(), True))
|
|
632
|
+
True
|
|
633
|
+
>>> (MapType(StringType(), IntegerType(), False) == MapType(StringType(), FloatType()))
|
|
634
|
+
False
|
|
635
|
+
"""
|
|
636
|
+
|
|
637
|
+
def __init__(self, keyType: DataType, valueType: DataType, valueContainsNull: bool = True) -> None: # noqa: D107
|
|
638
|
+
super().__init__(duckdb.map_type(keyType.duckdb_type, valueType.duckdb_type))
|
|
639
|
+
assert isinstance(keyType, DataType), f"keyType {keyType} should be an instance of {DataType}"
|
|
640
|
+
assert isinstance(valueType, DataType), f"valueType {valueType} should be an instance of {DataType}"
|
|
641
|
+
self.keyType = keyType
|
|
642
|
+
self.valueType = valueType
|
|
643
|
+
self.valueContainsNull = valueContainsNull
|
|
644
|
+
|
|
645
|
+
def simpleString(self) -> str: # noqa: D102
|
|
646
|
+
return f"map<{self.keyType.simpleString()},{self.valueType.simpleString()}>"
|
|
647
|
+
|
|
648
|
+
def __repr__(self) -> str: # noqa: D105
|
|
649
|
+
return f"MapType({self.keyType}, {self.valueType}, {self.valueContainsNull!s})"
|
|
650
|
+
|
|
651
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
652
|
+
return self.keyType.needConversion() or self.valueType.needConversion()
|
|
653
|
+
|
|
654
|
+
def toInternal(self, obj: dict[T, Optional[U]]) -> dict[T, Optional[U]]: # noqa: D102
|
|
655
|
+
if not self.needConversion():
|
|
656
|
+
return obj
|
|
657
|
+
return obj and {self.keyType.toInternal(k): self.valueType.toInternal(v) for k, v in obj.items()}
|
|
658
|
+
|
|
659
|
+
def fromInternal(self, obj: dict[T, Optional[U]]) -> dict[T, Optional[U]]: # noqa: D102
|
|
660
|
+
if not self.needConversion():
|
|
661
|
+
return obj
|
|
662
|
+
return obj and {self.keyType.fromInternal(k): self.valueType.fromInternal(v) for k, v in obj.items()}
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
class StructField(DataType):
|
|
666
|
+
"""A field in :class:`StructType`.
|
|
667
|
+
|
|
668
|
+
Parameters
|
|
669
|
+
----------
|
|
670
|
+
name : str
|
|
671
|
+
name of the field.
|
|
672
|
+
dataType : :class:`DataType`
|
|
673
|
+
:class:`DataType` of the field.
|
|
674
|
+
nullable : bool, optional
|
|
675
|
+
whether the field can be null (None) or not.
|
|
676
|
+
metadata : dict, optional
|
|
677
|
+
a dict from string to simple type that can be toInternald to JSON automatically
|
|
678
|
+
|
|
679
|
+
Examples:
|
|
680
|
+
--------
|
|
681
|
+
>>> (StructField("f1", StringType(), True) == StructField("f1", StringType(), True))
|
|
682
|
+
True
|
|
683
|
+
>>> (StructField("f1", StringType(), True) == StructField("f2", StringType(), True))
|
|
684
|
+
False
|
|
685
|
+
"""
|
|
686
|
+
|
|
687
|
+
def __init__( # noqa: D107
|
|
688
|
+
self,
|
|
689
|
+
name: str,
|
|
690
|
+
dataType: DataType,
|
|
691
|
+
nullable: bool = True,
|
|
692
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
693
|
+
) -> None:
|
|
694
|
+
super().__init__(dataType.duckdb_type)
|
|
695
|
+
assert isinstance(dataType, DataType), f"dataType {dataType} should be an instance of {DataType}"
|
|
696
|
+
assert isinstance(name, str), f"field name {name} should be a string"
|
|
697
|
+
self.name = name
|
|
698
|
+
self.dataType = dataType
|
|
699
|
+
self.nullable = nullable
|
|
700
|
+
self.metadata = metadata or {}
|
|
701
|
+
|
|
702
|
+
def simpleString(self) -> str: # noqa: D102
|
|
703
|
+
return f"{self.name}:{self.dataType.simpleString()}"
|
|
704
|
+
|
|
705
|
+
def __repr__(self) -> str: # noqa: D105
|
|
706
|
+
return f"StructField('{self.name}', {self.dataType}, {self.nullable!s})"
|
|
707
|
+
|
|
708
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
709
|
+
return self.dataType.needConversion()
|
|
710
|
+
|
|
711
|
+
def toInternal(self, obj: T) -> T: # noqa: D102
|
|
712
|
+
return self.dataType.toInternal(obj)
|
|
713
|
+
|
|
714
|
+
def fromInternal(self, obj: T) -> T: # noqa: D102
|
|
715
|
+
return self.dataType.fromInternal(obj)
|
|
716
|
+
|
|
717
|
+
def typeName(self) -> str: # type: ignore[override] # noqa: D102
|
|
718
|
+
msg = "StructField does not have typeName. Use typeName on its type explicitly instead."
|
|
719
|
+
raise TypeError(msg)
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
class StructType(DataType):
|
|
723
|
+
r"""Struct type, consisting of a list of :class:`StructField`.
|
|
724
|
+
|
|
725
|
+
This is the data type representing a :class:`Row`.
|
|
726
|
+
|
|
727
|
+
Iterating a :class:`StructType` will iterate over its :class:`StructField`\\s.
|
|
728
|
+
A contained :class:`StructField` can be accessed by its name or position.
|
|
729
|
+
|
|
730
|
+
Examples:
|
|
731
|
+
--------
|
|
732
|
+
>>> struct1 = StructType([StructField("f1", StringType(), True)])
|
|
733
|
+
>>> struct1["f1"]
|
|
734
|
+
StructField('f1', StringType(), True)
|
|
735
|
+
>>> struct1[0]
|
|
736
|
+
StructField('f1', StringType(), True)
|
|
737
|
+
|
|
738
|
+
>>> struct1 = StructType([StructField("f1", StringType(), True)])
|
|
739
|
+
>>> struct2 = StructType([StructField("f1", StringType(), True)])
|
|
740
|
+
>>> struct1 == struct2
|
|
741
|
+
True
|
|
742
|
+
>>> struct1 = StructType([StructField("f1", StringType(), True)])
|
|
743
|
+
>>> struct2 = StructType(
|
|
744
|
+
... [StructField("f1", StringType(), True), StructField("f2", IntegerType(), False)]
|
|
745
|
+
... )
|
|
746
|
+
>>> struct1 == struct2
|
|
747
|
+
False
|
|
748
|
+
"""
|
|
749
|
+
|
|
750
|
+
def _update_internal_duckdb_type(self) -> None:
|
|
751
|
+
self.duckdb_type = duckdb.struct_type(dict(zip(self.names, [x.duckdb_type for x in self.fields])))
|
|
752
|
+
|
|
753
|
+
def __init__(self, fields: Optional[list[StructField]] = None) -> None: # noqa: D107
|
|
754
|
+
if not fields:
|
|
755
|
+
self.fields = []
|
|
756
|
+
self.names = []
|
|
757
|
+
else:
|
|
758
|
+
self.fields = fields
|
|
759
|
+
self.names = [f.name for f in fields]
|
|
760
|
+
assert all(isinstance(f, StructField) for f in fields), "fields should be a list of StructField"
|
|
761
|
+
# Precalculated list of fields that need conversion with fromInternal/toInternal functions
|
|
762
|
+
self._needConversion = [f.needConversion() for f in self]
|
|
763
|
+
self._needSerializeAnyField = any(self._needConversion)
|
|
764
|
+
super().__init__(duckdb.struct_type(dict(zip(self.names, [x.duckdb_type for x in self.fields]))))
|
|
765
|
+
|
|
766
|
+
@overload
|
|
767
|
+
def add(
|
|
768
|
+
self,
|
|
769
|
+
field: str,
|
|
770
|
+
data_type: Union[str, DataType],
|
|
771
|
+
nullable: bool = True,
|
|
772
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
773
|
+
) -> "StructType": ...
|
|
774
|
+
|
|
775
|
+
@overload
|
|
776
|
+
def add(self, field: StructField) -> "StructType": ...
|
|
777
|
+
|
|
778
|
+
def add(
|
|
779
|
+
self,
|
|
780
|
+
field: Union[str, StructField],
|
|
781
|
+
data_type: Optional[Union[str, DataType]] = None,
|
|
782
|
+
nullable: bool = True,
|
|
783
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
784
|
+
) -> "StructType":
|
|
785
|
+
r"""Construct a :class:`StructType` by adding new elements to it, to define the schema.
|
|
786
|
+
The method accepts either:
|
|
787
|
+
|
|
788
|
+
a) A single parameter which is a :class:`StructField` object.
|
|
789
|
+
b) Between 2 and 4 parameters as (name, data_type, nullable (optional),
|
|
790
|
+
metadata(optional). The data_type parameter may be either a String or a
|
|
791
|
+
:class:`DataType` object.
|
|
792
|
+
|
|
793
|
+
Parameters
|
|
794
|
+
----------
|
|
795
|
+
field : str or :class:`StructField`
|
|
796
|
+
Either the name of the field or a :class:`StructField` object
|
|
797
|
+
data_type : :class:`DataType`, optional
|
|
798
|
+
If present, the DataType of the :class:`StructField` to create
|
|
799
|
+
nullable : bool, optional
|
|
800
|
+
Whether the field to add should be nullable (default True)
|
|
801
|
+
metadata : dict, optional
|
|
802
|
+
Any additional metadata (default None)
|
|
803
|
+
|
|
804
|
+
Returns:
|
|
805
|
+
-------
|
|
806
|
+
:class:`StructType`
|
|
807
|
+
|
|
808
|
+
Examples:
|
|
809
|
+
--------
|
|
810
|
+
>>> struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
|
|
811
|
+
>>> struct2 = StructType([StructField("f1", StringType(), True), \\
|
|
812
|
+
... StructField("f2", StringType(), True, None)])
|
|
813
|
+
>>> struct1 == struct2
|
|
814
|
+
True
|
|
815
|
+
>>> struct1 = StructType().add(StructField("f1", StringType(), True))
|
|
816
|
+
>>> struct2 = StructType([StructField("f1", StringType(), True)])
|
|
817
|
+
>>> struct1 == struct2
|
|
818
|
+
True
|
|
819
|
+
>>> struct1 = StructType().add("f1", "string", True)
|
|
820
|
+
>>> struct2 = StructType([StructField("f1", StringType(), True)])
|
|
821
|
+
>>> struct1 == struct2
|
|
822
|
+
True
|
|
823
|
+
""" # noqa: D205, D415
|
|
824
|
+
if isinstance(field, StructField):
|
|
825
|
+
self.fields.append(field)
|
|
826
|
+
self.names.append(field.name)
|
|
827
|
+
else:
|
|
828
|
+
if isinstance(field, str) and data_type is None:
|
|
829
|
+
msg = "Must specify DataType if passing name of struct_field to create."
|
|
830
|
+
raise ValueError(msg)
|
|
831
|
+
else:
|
|
832
|
+
data_type_f = data_type
|
|
833
|
+
self.fields.append(StructField(field, data_type_f, nullable, metadata))
|
|
834
|
+
self.names.append(field)
|
|
835
|
+
# Precalculated list of fields that need conversion with fromInternal/toInternal functions
|
|
836
|
+
self._needConversion = [f.needConversion() for f in self]
|
|
837
|
+
self._needSerializeAnyField = any(self._needConversion)
|
|
838
|
+
self._update_internal_duckdb_type()
|
|
839
|
+
return self
|
|
840
|
+
|
|
841
|
+
def __iter__(self) -> Iterator[StructField]:
|
|
842
|
+
"""Iterate the fields."""
|
|
843
|
+
return iter(self.fields)
|
|
844
|
+
|
|
845
|
+
def __len__(self) -> int:
|
|
846
|
+
"""Return the number of fields."""
|
|
847
|
+
return len(self.fields)
|
|
848
|
+
|
|
849
|
+
def __getitem__(self, key: Union[str, int]) -> StructField:
|
|
850
|
+
"""Access fields by name or slice."""
|
|
851
|
+
if isinstance(key, str):
|
|
852
|
+
for field in self:
|
|
853
|
+
if field.name == key:
|
|
854
|
+
return field
|
|
855
|
+
msg = f"No StructField named {key}"
|
|
856
|
+
raise KeyError(msg)
|
|
857
|
+
elif isinstance(key, int):
|
|
858
|
+
try:
|
|
859
|
+
return self.fields[key]
|
|
860
|
+
except IndexError:
|
|
861
|
+
msg = "StructType index out of range"
|
|
862
|
+
raise IndexError(msg) # noqa: B904
|
|
863
|
+
elif isinstance(key, slice):
|
|
864
|
+
return StructType(self.fields[key])
|
|
865
|
+
else:
|
|
866
|
+
msg = "StructType keys should be strings, integers or slices"
|
|
867
|
+
raise TypeError(msg)
|
|
868
|
+
|
|
869
|
+
def simpleString(self) -> str: # noqa: D102
|
|
870
|
+
return "struct<{}>".format(",".join(f.simpleString() for f in self))
|
|
871
|
+
|
|
872
|
+
def __repr__(self) -> str: # noqa: D105
|
|
873
|
+
return "StructType([{}])".format(", ".join(str(field) for field in self))
|
|
874
|
+
|
|
875
|
+
def __contains__(self, item: str) -> bool: # noqa: D105
|
|
876
|
+
return item in self.names
|
|
877
|
+
|
|
878
|
+
def extract_types_and_names(self) -> tuple[list[str], list[str]]: # noqa: D102
|
|
879
|
+
names = []
|
|
880
|
+
types = []
|
|
881
|
+
for f in self.fields:
|
|
882
|
+
types.append(str(f.dataType.duckdb_type))
|
|
883
|
+
names.append(f.name)
|
|
884
|
+
return (types, names)
|
|
885
|
+
|
|
886
|
+
def fieldNames(self) -> list[str]:
|
|
887
|
+
"""Returns all field names in a list.
|
|
888
|
+
|
|
889
|
+
Examples:
|
|
890
|
+
--------
|
|
891
|
+
>>> struct = StructType([StructField("f1", StringType(), True)])
|
|
892
|
+
>>> struct.fieldNames()
|
|
893
|
+
['f1']
|
|
894
|
+
"""
|
|
895
|
+
return list(self.names)
|
|
896
|
+
|
|
897
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
898
|
+
# We need convert Row()/namedtuple into tuple()
|
|
899
|
+
return True
|
|
900
|
+
|
|
901
|
+
def toInternal(self, obj: tuple) -> tuple: # noqa: D102
|
|
902
|
+
if obj is None:
|
|
903
|
+
return
|
|
904
|
+
|
|
905
|
+
if self._needSerializeAnyField:
|
|
906
|
+
# Only calling toInternal function for fields that need conversion
|
|
907
|
+
if isinstance(obj, dict):
|
|
908
|
+
return tuple(
|
|
909
|
+
f.toInternal(obj.get(n)) if c else obj.get(n)
|
|
910
|
+
for n, f, c in zip(self.names, self.fields, self._needConversion)
|
|
911
|
+
)
|
|
912
|
+
elif isinstance(obj, (tuple, list)):
|
|
913
|
+
return tuple(f.toInternal(v) if c else v for f, v, c in zip(self.fields, obj, self._needConversion))
|
|
914
|
+
elif hasattr(obj, "__dict__"):
|
|
915
|
+
d = obj.__dict__
|
|
916
|
+
return tuple(
|
|
917
|
+
f.toInternal(d.get(n)) if c else d.get(n)
|
|
918
|
+
for n, f, c in zip(self.names, self.fields, self._needConversion)
|
|
919
|
+
)
|
|
920
|
+
else:
|
|
921
|
+
msg = f"Unexpected tuple {obj!r} with StructType"
|
|
922
|
+
raise ValueError(msg)
|
|
923
|
+
else:
|
|
924
|
+
if isinstance(obj, dict):
|
|
925
|
+
return tuple(obj.get(n) for n in self.names)
|
|
926
|
+
elif isinstance(obj, (list, tuple)):
|
|
927
|
+
return tuple(obj)
|
|
928
|
+
elif hasattr(obj, "__dict__"):
|
|
929
|
+
d = obj.__dict__
|
|
930
|
+
return tuple(d.get(n) for n in self.names)
|
|
931
|
+
else:
|
|
932
|
+
msg = f"Unexpected tuple {obj!r} with StructType"
|
|
933
|
+
raise ValueError(msg)
|
|
934
|
+
|
|
935
|
+
def fromInternal(self, obj: tuple) -> "Row": # noqa: D102
|
|
936
|
+
if obj is None:
|
|
937
|
+
return
|
|
938
|
+
if isinstance(obj, Row):
|
|
939
|
+
# it's already converted by pickler
|
|
940
|
+
return obj
|
|
941
|
+
|
|
942
|
+
values: Union[tuple, list]
|
|
943
|
+
if self._needSerializeAnyField:
|
|
944
|
+
# Only calling fromInternal function for fields that need conversion
|
|
945
|
+
values = [f.fromInternal(v) if c else v for f, v, c in zip(self.fields, obj, self._needConversion)]
|
|
946
|
+
else:
|
|
947
|
+
values = obj
|
|
948
|
+
return _create_row(self.names, values)
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
class UnionType(DataType):
|
|
952
|
+
def __init__(self) -> None:
|
|
953
|
+
raise ContributionsAcceptedError
|
|
954
|
+
|
|
955
|
+
|
|
956
|
+
class UserDefinedType(DataType):
|
|
957
|
+
"""User-defined type (UDT).
|
|
958
|
+
|
|
959
|
+
.. note:: WARN: Spark Internal Use Only
|
|
960
|
+
"""
|
|
961
|
+
|
|
962
|
+
def __init__(self) -> None:
|
|
963
|
+
raise ContributionsAcceptedError
|
|
964
|
+
|
|
965
|
+
@classmethod
|
|
966
|
+
def typeName(cls) -> str:
|
|
967
|
+
return cls.__name__.lower()
|
|
968
|
+
|
|
969
|
+
@classmethod
|
|
970
|
+
def sqlType(cls) -> DataType:
|
|
971
|
+
"""Underlying SQL storage type for this UDT."""
|
|
972
|
+
msg = "UDT must implement sqlType()."
|
|
973
|
+
raise NotImplementedError(msg)
|
|
974
|
+
|
|
975
|
+
@classmethod
|
|
976
|
+
def module(cls) -> str:
|
|
977
|
+
"""The Python module of the UDT."""
|
|
978
|
+
msg = "UDT must implement module()."
|
|
979
|
+
raise NotImplementedError(msg)
|
|
980
|
+
|
|
981
|
+
@classmethod
|
|
982
|
+
def scalaUDT(cls) -> str:
|
|
983
|
+
"""The class name of the paired Scala UDT (could be '', if there
|
|
984
|
+
is no corresponding one).
|
|
985
|
+
""" # noqa: D205
|
|
986
|
+
return ""
|
|
987
|
+
|
|
988
|
+
def needConversion(self) -> bool:
|
|
989
|
+
return True
|
|
990
|
+
|
|
991
|
+
@classmethod
|
|
992
|
+
def _cachedSqlType(cls) -> DataType:
|
|
993
|
+
"""Cache the sqlType() into class, because it's heavily used in `toInternal`."""
|
|
994
|
+
if not hasattr(cls, "_cached_sql_type"):
|
|
995
|
+
cls._cached_sql_type = cls.sqlType() # type: ignore[attr-defined]
|
|
996
|
+
return cls._cached_sql_type # type: ignore[attr-defined]
|
|
997
|
+
|
|
998
|
+
def toInternal(self, obj: Any) -> Any: # noqa: ANN401
|
|
999
|
+
if obj is not None:
|
|
1000
|
+
return self._cachedSqlType().toInternal(self.serialize(obj))
|
|
1001
|
+
|
|
1002
|
+
def fromInternal(self, obj: Any) -> Any: # noqa: ANN401
|
|
1003
|
+
v = self._cachedSqlType().fromInternal(obj)
|
|
1004
|
+
if v is not None:
|
|
1005
|
+
return self.deserialize(v)
|
|
1006
|
+
|
|
1007
|
+
def serialize(self, obj: Any) -> NoReturn: # noqa: ANN401
|
|
1008
|
+
"""Converts a user-type object into a SQL datum."""
|
|
1009
|
+
msg = "UDT must implement toInternal()."
|
|
1010
|
+
raise NotImplementedError(msg)
|
|
1011
|
+
|
|
1012
|
+
def deserialize(self, datum: Any) -> NoReturn: # noqa: ANN401
|
|
1013
|
+
"""Converts a SQL datum into a user-type object."""
|
|
1014
|
+
msg = "UDT must implement fromInternal()."
|
|
1015
|
+
raise NotImplementedError(msg)
|
|
1016
|
+
|
|
1017
|
+
def simpleString(self) -> str:
|
|
1018
|
+
return "udt"
|
|
1019
|
+
|
|
1020
|
+
def __eq__(self, other: object) -> bool:
|
|
1021
|
+
return type(self) is type(other)
|
|
1022
|
+
|
|
1023
|
+
|
|
1024
|
+
_atomic_types: list[type[DataType]] = [
|
|
1025
|
+
StringType,
|
|
1026
|
+
BinaryType,
|
|
1027
|
+
BooleanType,
|
|
1028
|
+
DecimalType,
|
|
1029
|
+
FloatType,
|
|
1030
|
+
DoubleType,
|
|
1031
|
+
ByteType,
|
|
1032
|
+
ShortType,
|
|
1033
|
+
IntegerType,
|
|
1034
|
+
LongType,
|
|
1035
|
+
DateType,
|
|
1036
|
+
TimestampType,
|
|
1037
|
+
TimestampNTZType,
|
|
1038
|
+
NullType,
|
|
1039
|
+
]
|
|
1040
|
+
_all_atomic_types: dict[str, type[DataType]] = {t.typeName(): t for t in _atomic_types}
|
|
1041
|
+
|
|
1042
|
+
_complex_types: list[type[Union[ArrayType, MapType, StructType]]] = [
|
|
1043
|
+
ArrayType,
|
|
1044
|
+
MapType,
|
|
1045
|
+
StructType,
|
|
1046
|
+
]
|
|
1047
|
+
_all_complex_types: dict[str, type[Union[ArrayType, MapType, StructType]]] = {v.typeName(): v for v in _complex_types}
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)")
|
|
1051
|
+
_INTERVAL_DAYTIME = re.compile(r"interval (day|hour|minute|second)( to (day|hour|minute|second))?")
|
|
1052
|
+
|
|
1053
|
+
|
|
1054
|
+
def _create_row(fields: Union["Row", list[str]], values: Union[tuple[Any, ...], list[Any]]) -> "Row":
|
|
1055
|
+
row = Row(*values)
|
|
1056
|
+
row.__fields__ = fields
|
|
1057
|
+
return row
|
|
1058
|
+
|
|
1059
|
+
|
|
1060
|
+
class Row(tuple):
|
|
1061
|
+
"""A row in :class:`DataFrame`.
|
|
1062
|
+
The fields in it can be accessed:
|
|
1063
|
+
|
|
1064
|
+
* like attributes (``row.key``)
|
|
1065
|
+
* like dictionary values (``row[key]``)
|
|
1066
|
+
|
|
1067
|
+
``key in row`` will search through row keys.
|
|
1068
|
+
|
|
1069
|
+
Row can be used to create a row object by using named arguments.
|
|
1070
|
+
It is not allowed to omit a named argument to represent that the value is
|
|
1071
|
+
None or missing. This should be explicitly set to None in this case.
|
|
1072
|
+
|
|
1073
|
+
.. versionchanged:: 3.0.0
|
|
1074
|
+
Rows created from named arguments no longer have
|
|
1075
|
+
field names sorted alphabetically and will be ordered in the position as
|
|
1076
|
+
entered.
|
|
1077
|
+
|
|
1078
|
+
Examples:
|
|
1079
|
+
--------
|
|
1080
|
+
>>> row = Row(name="Alice", age=11)
|
|
1081
|
+
>>> row
|
|
1082
|
+
Row(name='Alice', age=11)
|
|
1083
|
+
>>> row["name"], row["age"]
|
|
1084
|
+
('Alice', 11)
|
|
1085
|
+
>>> row.name, row.age
|
|
1086
|
+
('Alice', 11)
|
|
1087
|
+
>>> "name" in row
|
|
1088
|
+
True
|
|
1089
|
+
>>> "wrong_key" in row
|
|
1090
|
+
False
|
|
1091
|
+
|
|
1092
|
+
Row also can be used to create another Row like class, then it
|
|
1093
|
+
could be used to create Row objects, such as
|
|
1094
|
+
|
|
1095
|
+
>>> Person = Row("name", "age")
|
|
1096
|
+
>>> Person
|
|
1097
|
+
<Row('name', 'age')>
|
|
1098
|
+
>>> "name" in Person
|
|
1099
|
+
True
|
|
1100
|
+
>>> "wrong_key" in Person
|
|
1101
|
+
False
|
|
1102
|
+
>>> Person("Alice", 11)
|
|
1103
|
+
Row(name='Alice', age=11)
|
|
1104
|
+
|
|
1105
|
+
This form can also be used to create rows as tuple values, i.e. with unnamed
|
|
1106
|
+
fields.
|
|
1107
|
+
|
|
1108
|
+
>>> row1 = Row("Alice", 11)
|
|
1109
|
+
>>> row2 = Row(name="Alice", age=11)
|
|
1110
|
+
>>> row1 == row2
|
|
1111
|
+
True
|
|
1112
|
+
""" # noqa: D205, D415
|
|
1113
|
+
|
|
1114
|
+
@overload
|
|
1115
|
+
def __new__(cls, *args: str) -> "Row": ...
|
|
1116
|
+
|
|
1117
|
+
@overload
|
|
1118
|
+
def __new__(cls, **kwargs: Any) -> "Row": ... # noqa: ANN401
|
|
1119
|
+
|
|
1120
|
+
def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row": # noqa: D102
|
|
1121
|
+
if args and kwargs:
|
|
1122
|
+
msg = "Can not use both args and kwargs to create Row"
|
|
1123
|
+
raise ValueError(msg)
|
|
1124
|
+
if kwargs:
|
|
1125
|
+
# create row objects
|
|
1126
|
+
row = tuple.__new__(cls, list(kwargs.values()))
|
|
1127
|
+
row.__fields__ = list(kwargs.keys())
|
|
1128
|
+
return row
|
|
1129
|
+
else:
|
|
1130
|
+
# create row class or objects
|
|
1131
|
+
return tuple.__new__(cls, args)
|
|
1132
|
+
|
|
1133
|
+
def asDict(self, recursive: bool = False) -> dict[str, Any]:
|
|
1134
|
+
"""Return as a dict.
|
|
1135
|
+
|
|
1136
|
+
Parameters
|
|
1137
|
+
----------
|
|
1138
|
+
recursive : bool, optional
|
|
1139
|
+
turns the nested Rows to dict (default: False).
|
|
1140
|
+
|
|
1141
|
+
Notes:
|
|
1142
|
+
-----
|
|
1143
|
+
If a row contains duplicate field names, e.g., the rows of a join
|
|
1144
|
+
between two :class:`DataFrame` that both have the fields of same names,
|
|
1145
|
+
one of the duplicate fields will be selected by ``asDict``. ``__getitem__``
|
|
1146
|
+
will also return one of the duplicate fields, however returned value might
|
|
1147
|
+
be different to ``asDict``.
|
|
1148
|
+
|
|
1149
|
+
Examples:
|
|
1150
|
+
--------
|
|
1151
|
+
>>> Row(name="Alice", age=11).asDict() == {"name": "Alice", "age": 11}
|
|
1152
|
+
True
|
|
1153
|
+
>>> row = Row(key=1, value=Row(name="a", age=2))
|
|
1154
|
+
>>> row.asDict() == {"key": 1, "value": Row(name="a", age=2)}
|
|
1155
|
+
True
|
|
1156
|
+
>>> row.asDict(True) == {"key": 1, "value": {"name": "a", "age": 2}}
|
|
1157
|
+
True
|
|
1158
|
+
"""
|
|
1159
|
+
if not hasattr(self, "__fields__"):
|
|
1160
|
+
msg = "Cannot convert a Row class into dict"
|
|
1161
|
+
raise TypeError(msg)
|
|
1162
|
+
|
|
1163
|
+
if recursive:
|
|
1164
|
+
|
|
1165
|
+
def conv(obj: Union[Row, list, dict, object]) -> Union[list, dict, object]:
|
|
1166
|
+
if isinstance(obj, Row):
|
|
1167
|
+
return obj.asDict(True)
|
|
1168
|
+
elif isinstance(obj, list):
|
|
1169
|
+
return [conv(o) for o in obj]
|
|
1170
|
+
elif isinstance(obj, dict):
|
|
1171
|
+
return {k: conv(v) for k, v in obj.items()}
|
|
1172
|
+
else:
|
|
1173
|
+
return obj
|
|
1174
|
+
|
|
1175
|
+
return dict(zip(self.__fields__, (conv(o) for o in self)))
|
|
1176
|
+
else:
|
|
1177
|
+
return dict(zip(self.__fields__, self))
|
|
1178
|
+
|
|
1179
|
+
def __contains__(self, item: Any) -> bool: # noqa: D105, ANN401
|
|
1180
|
+
if hasattr(self, "__fields__"):
|
|
1181
|
+
return item in self.__fields__
|
|
1182
|
+
else:
|
|
1183
|
+
return super().__contains__(item)
|
|
1184
|
+
|
|
1185
|
+
# let object acts like class
|
|
1186
|
+
def __call__(self, *args: Any) -> "Row": # noqa: ANN401
|
|
1187
|
+
"""Create new Row object."""
|
|
1188
|
+
if len(args) > len(self):
|
|
1189
|
+
msg = f"Can not create Row with fields {self}, expected {len(self):d} values but got {args}"
|
|
1190
|
+
raise ValueError(msg)
|
|
1191
|
+
return _create_row(self, args)
|
|
1192
|
+
|
|
1193
|
+
def __getitem__(self, item: Any) -> Any: # noqa: D105, ANN401
|
|
1194
|
+
if isinstance(item, (int, slice)):
|
|
1195
|
+
return super().__getitem__(item)
|
|
1196
|
+
try:
|
|
1197
|
+
# it will be slow when it has many fields,
|
|
1198
|
+
# but this will not be used in normal cases
|
|
1199
|
+
idx = self.__fields__.index(item)
|
|
1200
|
+
return super().__getitem__(idx)
|
|
1201
|
+
except IndexError:
|
|
1202
|
+
raise KeyError(item) # noqa: B904
|
|
1203
|
+
except ValueError:
|
|
1204
|
+
raise ValueError(item) # noqa: B904
|
|
1205
|
+
|
|
1206
|
+
def __getattr__(self, item: str) -> Any: # noqa: D105, ANN401
|
|
1207
|
+
if item.startswith("__"):
|
|
1208
|
+
raise AttributeError(item)
|
|
1209
|
+
try:
|
|
1210
|
+
# it will be slow when it has many fields,
|
|
1211
|
+
# but this will not be used in normal cases
|
|
1212
|
+
idx = self.__fields__.index(item)
|
|
1213
|
+
return self[idx]
|
|
1214
|
+
except IndexError:
|
|
1215
|
+
raise AttributeError(item) # noqa: B904
|
|
1216
|
+
except ValueError:
|
|
1217
|
+
raise AttributeError(item) # noqa: B904
|
|
1218
|
+
|
|
1219
|
+
def __setattr__(self, key: Any, value: Any) -> None: # noqa: D105, ANN401
|
|
1220
|
+
if key != "__fields__":
|
|
1221
|
+
msg = "Row is read-only"
|
|
1222
|
+
raise RuntimeError(msg)
|
|
1223
|
+
self.__dict__[key] = value
|
|
1224
|
+
|
|
1225
|
+
def __reduce__(
|
|
1226
|
+
self,
|
|
1227
|
+
) -> Union[str, tuple[Any, ...]]:
|
|
1228
|
+
"""Returns a tuple so Python knows how to pickle Row."""
|
|
1229
|
+
if hasattr(self, "__fields__"):
|
|
1230
|
+
return (_create_row, (self.__fields__, tuple(self)))
|
|
1231
|
+
else:
|
|
1232
|
+
return tuple.__reduce__(self)
|
|
1233
|
+
|
|
1234
|
+
def __repr__(self) -> str:
|
|
1235
|
+
"""Printable representation of Row used in Python REPL."""
|
|
1236
|
+
if hasattr(self, "__fields__"):
|
|
1237
|
+
return "Row({})".format(", ".join(f"{k}={v!r}" for k, v in zip(self.__fields__, tuple(self))))
|
|
1238
|
+
else:
|
|
1239
|
+
return "<Row({})>".format(", ".join(f"{field!r}" for field in self))
|