duckdb 0.8.2.dev3007__cp311-cp311-win_amd64.whl → 1.4.3.dev8__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _duckdb-stubs/__init__.pyi +1478 -0
- _duckdb-stubs/_func.pyi +46 -0
- _duckdb-stubs/_sqltypes.pyi +75 -0
- duckdb/duckdb.cp311-win_amd64.pyd → _duckdb.cp311-win_amd64.pyd +0 -0
- adbc_driver_duckdb/__init__.py +10 -8
- adbc_driver_duckdb/dbapi.py +4 -5
- duckdb/__init__.py +250 -196
- duckdb/_dbapi_type_object.py +231 -0
- duckdb/_version.py +22 -0
- {pyduckdb → duckdb}/bytes_io_wrapper.py +12 -8
- duckdb/experimental/__init__.py +5 -0
- duckdb/experimental/spark/__init__.py +6 -0
- {pyduckdb → duckdb/experimental}/spark/_globals.py +8 -8
- duckdb/experimental/spark/_typing.py +46 -0
- duckdb/experimental/spark/conf.py +46 -0
- duckdb/experimental/spark/context.py +180 -0
- duckdb/experimental/spark/errors/__init__.py +70 -0
- duckdb/experimental/spark/errors/error_classes.py +918 -0
- duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
- duckdb/experimental/spark/errors/exceptions/base.py +168 -0
- duckdb/experimental/spark/errors/utils.py +111 -0
- duckdb/experimental/spark/exception.py +18 -0
- {pyduckdb → duckdb/experimental}/spark/sql/__init__.py +5 -5
- duckdb/experimental/spark/sql/_typing.py +86 -0
- duckdb/experimental/spark/sql/catalog.py +79 -0
- duckdb/experimental/spark/sql/column.py +361 -0
- duckdb/experimental/spark/sql/conf.py +24 -0
- duckdb/experimental/spark/sql/dataframe.py +1389 -0
- duckdb/experimental/spark/sql/functions.py +6195 -0
- duckdb/experimental/spark/sql/group.py +424 -0
- duckdb/experimental/spark/sql/readwriter.py +435 -0
- duckdb/experimental/spark/sql/session.py +297 -0
- duckdb/experimental/spark/sql/streaming.py +36 -0
- duckdb/experimental/spark/sql/type_utils.py +107 -0
- {pyduckdb → duckdb/experimental}/spark/sql/types.py +323 -342
- duckdb/experimental/spark/sql/udf.py +37 -0
- duckdb/filesystem.py +33 -0
- duckdb/func/__init__.py +3 -0
- duckdb/functional/__init__.py +12 -16
- duckdb/polars_io.py +284 -0
- duckdb/py.typed +0 -0
- duckdb/query_graph/__main__.py +358 -0
- duckdb/sqltypes/__init__.py +63 -0
- duckdb/typing/__init__.py +18 -6
- {pyduckdb → duckdb}/udf.py +10 -5
- duckdb/value/__init__.py +1 -0
- pyduckdb/value/constant.py → duckdb/value/constant/__init__.py +66 -57
- duckdb-1.4.3.dev8.dist-info/METADATA +88 -0
- duckdb-1.4.3.dev8.dist-info/RECORD +52 -0
- {duckdb-0.8.2.dev3007.dist-info → duckdb-1.4.3.dev8.dist-info}/WHEEL +1 -1
- duckdb-1.4.3.dev8.dist-info/licenses/LICENSE +7 -0
- duckdb-0.8.2.dev3007.dist-info/METADATA +0 -20
- duckdb-0.8.2.dev3007.dist-info/RECORD +0 -34
- duckdb-0.8.2.dev3007.dist-info/top_level.txt +0 -4
- duckdb-stubs/__init__.pyi +0 -574
- duckdb-stubs/functional/__init__.pyi +0 -33
- duckdb-stubs/typing/__init__.pyi +0 -35
- pyduckdb/__init__.py +0 -61
- pyduckdb/filesystem.py +0 -64
- pyduckdb/spark/__init__.py +0 -7
- pyduckdb/spark/conf.py +0 -45
- pyduckdb/spark/context.py +0 -162
- pyduckdb/spark/exception.py +0 -9
- pyduckdb/spark/sql/catalog.py +0 -78
- pyduckdb/spark/sql/conf.py +0 -23
- pyduckdb/spark/sql/dataframe.py +0 -75
- pyduckdb/spark/sql/readwriter.py +0 -180
- pyduckdb/spark/sql/session.py +0 -249
- pyduckdb/spark/sql/streaming.py +0 -37
- pyduckdb/spark/sql/type_utils.py +0 -104
- pyduckdb/spark/sql/udf.py +0 -9
- {pyduckdb → duckdb/experimental}/spark/LICENSE +0 -0
|
@@ -1,28 +1,28 @@
|
|
|
1
|
-
#
|
|
1
|
+
# ruff: noqa: D100
|
|
2
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE
|
|
3
|
+
# file located in the 'spark' folder.
|
|
2
4
|
|
|
5
|
+
import calendar
|
|
6
|
+
import datetime
|
|
7
|
+
import math
|
|
8
|
+
import re
|
|
9
|
+
import time
|
|
10
|
+
from builtins import tuple
|
|
11
|
+
from collections.abc import Iterator, Mapping
|
|
12
|
+
from types import MappingProxyType
|
|
3
13
|
from typing import (
|
|
4
|
-
cast,
|
|
5
|
-
overload,
|
|
6
|
-
Dict,
|
|
7
|
-
Optional,
|
|
8
|
-
List,
|
|
9
|
-
Tuple,
|
|
10
14
|
Any,
|
|
11
|
-
Union,
|
|
12
|
-
Type,
|
|
13
|
-
TypeVar,
|
|
14
15
|
ClassVar,
|
|
15
|
-
|
|
16
|
-
|
|
16
|
+
NoReturn,
|
|
17
|
+
Optional,
|
|
18
|
+
TypeVar,
|
|
19
|
+
Union,
|
|
20
|
+
cast,
|
|
21
|
+
overload,
|
|
17
22
|
)
|
|
18
|
-
from builtins import tuple
|
|
19
|
-
import datetime
|
|
20
|
-
import calendar
|
|
21
|
-
import time
|
|
22
|
-
import math
|
|
23
23
|
|
|
24
24
|
import duckdb
|
|
25
|
-
from duckdb.
|
|
25
|
+
from duckdb.sqltypes import DuckDBPyType
|
|
26
26
|
|
|
27
27
|
from ..exception import ContributionsAcceptedError
|
|
28
28
|
|
|
@@ -30,104 +30,100 @@ T = TypeVar("T")
|
|
|
30
30
|
U = TypeVar("U")
|
|
31
31
|
|
|
32
32
|
__all__ = [
|
|
33
|
-
"
|
|
34
|
-
"NullType",
|
|
35
|
-
"StringType",
|
|
33
|
+
"ArrayType",
|
|
36
34
|
"BinaryType",
|
|
37
|
-
"UUIDType",
|
|
38
35
|
"BitstringType",
|
|
39
36
|
"BooleanType",
|
|
37
|
+
"ByteType",
|
|
38
|
+
"DataType",
|
|
40
39
|
"DateType",
|
|
41
|
-
"
|
|
42
|
-
"TimestampNTZType",
|
|
43
|
-
"TimestampNanosecondNTZType",
|
|
44
|
-
"TimestampMilisecondNTZType",
|
|
45
|
-
"TimestampSecondNTZType",
|
|
46
|
-
"TimeType",
|
|
47
|
-
"TimeNTZType",
|
|
40
|
+
"DayTimeIntervalType",
|
|
48
41
|
"DecimalType",
|
|
49
42
|
"DoubleType",
|
|
50
43
|
"FloatType",
|
|
51
|
-
"
|
|
52
|
-
"UnsignedByteType",
|
|
53
|
-
"ShortType",
|
|
54
|
-
"UnsignedShortType",
|
|
44
|
+
"HugeIntegerType",
|
|
55
45
|
"IntegerType",
|
|
56
|
-
"UnsignedIntegerType",
|
|
57
46
|
"LongType",
|
|
58
|
-
"UnsignedLongType",
|
|
59
|
-
"HugeIntegerType",
|
|
60
|
-
"DayTimeIntervalType",
|
|
61
|
-
"Row",
|
|
62
|
-
"ArrayType",
|
|
63
47
|
"MapType",
|
|
48
|
+
"NullType",
|
|
49
|
+
"Row",
|
|
50
|
+
"ShortType",
|
|
51
|
+
"StringType",
|
|
64
52
|
"StructField",
|
|
65
53
|
"StructType",
|
|
54
|
+
"TimeNTZType",
|
|
55
|
+
"TimeType",
|
|
56
|
+
"TimestampMilisecondNTZType",
|
|
57
|
+
"TimestampNTZType",
|
|
58
|
+
"TimestampNanosecondNTZType",
|
|
59
|
+
"TimestampSecondNTZType",
|
|
60
|
+
"TimestampType",
|
|
61
|
+
"UUIDType",
|
|
62
|
+
"UnsignedByteType",
|
|
63
|
+
"UnsignedHugeIntegerType",
|
|
64
|
+
"UnsignedIntegerType",
|
|
65
|
+
"UnsignedLongType",
|
|
66
|
+
"UnsignedShortType",
|
|
66
67
|
]
|
|
67
68
|
|
|
68
69
|
|
|
69
70
|
class DataType:
|
|
70
71
|
"""Base class for data types."""
|
|
71
72
|
|
|
72
|
-
def __init__(self, duckdb_type):
|
|
73
|
+
def __init__(self, duckdb_type: DuckDBPyType) -> None: # noqa: D107
|
|
73
74
|
self.duckdb_type = duckdb_type
|
|
74
75
|
|
|
75
|
-
def __repr__(self) -> str:
|
|
76
|
+
def __repr__(self) -> str: # noqa: D105
|
|
76
77
|
return self.__class__.__name__ + "()"
|
|
77
78
|
|
|
78
|
-
def __hash__(self) -> int:
|
|
79
|
+
def __hash__(self) -> int: # noqa: D105
|
|
79
80
|
return hash(str(self))
|
|
80
81
|
|
|
81
|
-
def __eq__(self, other:
|
|
82
|
+
def __eq__(self, other: object) -> bool: # noqa: D105
|
|
82
83
|
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
|
83
84
|
|
|
84
|
-
def __ne__(self, other:
|
|
85
|
+
def __ne__(self, other: object) -> bool: # noqa: D105
|
|
85
86
|
return not self.__eq__(other)
|
|
86
87
|
|
|
87
88
|
@classmethod
|
|
88
|
-
def typeName(cls) -> str:
|
|
89
|
+
def typeName(cls) -> str: # noqa: D102
|
|
89
90
|
return cls.__name__[:-4].lower()
|
|
90
91
|
|
|
91
|
-
def simpleString(self) -> str:
|
|
92
|
+
def simpleString(self) -> str: # noqa: D102
|
|
92
93
|
return self.typeName()
|
|
93
94
|
|
|
94
|
-
def jsonValue(self) -> Union[str,
|
|
95
|
+
def jsonValue(self) -> Union[str, dict[str, Any]]: # noqa: D102
|
|
95
96
|
raise ContributionsAcceptedError
|
|
96
97
|
|
|
97
|
-
def json(self) -> str:
|
|
98
|
+
def json(self) -> str: # noqa: D102
|
|
98
99
|
raise ContributionsAcceptedError
|
|
99
100
|
|
|
100
101
|
def needConversion(self) -> bool:
|
|
101
|
-
"""
|
|
102
|
-
Does this type needs conversion between Python object and internal SQL object.
|
|
102
|
+
"""Does this type needs conversion between Python object and internal SQL object.
|
|
103
103
|
|
|
104
104
|
This is used to avoid the unnecessary conversion for ArrayType/MapType/StructType.
|
|
105
105
|
"""
|
|
106
106
|
return False
|
|
107
107
|
|
|
108
|
-
def toInternal(self, obj: Any) -> Any:
|
|
109
|
-
"""
|
|
110
|
-
Converts a Python object into an internal SQL object.
|
|
111
|
-
"""
|
|
108
|
+
def toInternal(self, obj: Any) -> Any: # noqa: ANN401
|
|
109
|
+
"""Converts a Python object into an internal SQL object."""
|
|
112
110
|
return obj
|
|
113
111
|
|
|
114
|
-
def fromInternal(self, obj: Any) -> Any:
|
|
115
|
-
"""
|
|
116
|
-
Converts an internal SQL object into a native Python object.
|
|
117
|
-
"""
|
|
112
|
+
def fromInternal(self, obj: Any) -> Any: # noqa: ANN401
|
|
113
|
+
"""Converts an internal SQL object into a native Python object."""
|
|
118
114
|
return obj
|
|
119
115
|
|
|
120
116
|
|
|
121
117
|
# This singleton pattern does not work with pickle, you will get
|
|
122
118
|
# another object after pickle and unpickle
|
|
123
119
|
class DataTypeSingleton(type):
|
|
124
|
-
"""Metaclass for DataType"""
|
|
120
|
+
"""Metaclass for DataType."""
|
|
125
121
|
|
|
126
|
-
_instances: ClassVar[
|
|
122
|
+
_instances: ClassVar[dict[type["DataTypeSingleton"], "DataTypeSingleton"]] = {}
|
|
127
123
|
|
|
128
|
-
def __call__(cls:
|
|
124
|
+
def __call__(cls: type[T]) -> T: # type: ignore[override]
|
|
129
125
|
if cls not in cls._instances: # type: ignore[attr-defined]
|
|
130
|
-
cls._instances[cls] = super(
|
|
126
|
+
cls._instances[cls] = super().__call__() # type: ignore[misc, attr-defined]
|
|
131
127
|
return cls._instances[cls] # type: ignore[attr-defined]
|
|
132
128
|
|
|
133
129
|
|
|
@@ -137,17 +133,18 @@ class NullType(DataType, metaclass=DataTypeSingleton):
|
|
|
137
133
|
The data type representing None, used for the types that cannot be inferred.
|
|
138
134
|
"""
|
|
139
135
|
|
|
140
|
-
def __init__(self):
|
|
136
|
+
def __init__(self) -> None: # noqa: D107
|
|
141
137
|
super().__init__(DuckDBPyType("NULL"))
|
|
142
138
|
|
|
143
139
|
@classmethod
|
|
144
|
-
def typeName(cls) -> str:
|
|
140
|
+
def typeName(cls) -> str: # noqa: D102
|
|
145
141
|
return "void"
|
|
146
142
|
|
|
147
143
|
|
|
148
144
|
class AtomicType(DataType):
|
|
149
145
|
"""An internal type used to represent everything that is not
|
|
150
|
-
null, UDTs, arrays, structs, and maps.
|
|
146
|
+
null, UDTs, arrays, structs, and maps.
|
|
147
|
+
""" # noqa: D205
|
|
151
148
|
|
|
152
149
|
|
|
153
150
|
class NumericType(AtomicType):
|
|
@@ -165,54 +162,54 @@ class FractionalType(NumericType):
|
|
|
165
162
|
class StringType(AtomicType, metaclass=DataTypeSingleton):
|
|
166
163
|
"""String data type."""
|
|
167
164
|
|
|
168
|
-
def __init__(self):
|
|
165
|
+
def __init__(self) -> None: # noqa: D107
|
|
169
166
|
super().__init__(DuckDBPyType("VARCHAR"))
|
|
170
167
|
|
|
171
168
|
|
|
172
169
|
class BitstringType(AtomicType, metaclass=DataTypeSingleton):
|
|
173
170
|
"""Bitstring data type."""
|
|
174
171
|
|
|
175
|
-
def __init__(self):
|
|
172
|
+
def __init__(self) -> None: # noqa: D107
|
|
176
173
|
super().__init__(DuckDBPyType("BIT"))
|
|
177
174
|
|
|
178
175
|
|
|
179
176
|
class UUIDType(AtomicType, metaclass=DataTypeSingleton):
|
|
180
177
|
"""UUID data type."""
|
|
181
178
|
|
|
182
|
-
def __init__(self):
|
|
179
|
+
def __init__(self) -> None: # noqa: D107
|
|
183
180
|
super().__init__(DuckDBPyType("UUID"))
|
|
184
181
|
|
|
185
182
|
|
|
186
183
|
class BinaryType(AtomicType, metaclass=DataTypeSingleton):
|
|
187
184
|
"""Binary (byte array) data type."""
|
|
188
185
|
|
|
189
|
-
def __init__(self):
|
|
186
|
+
def __init__(self) -> None: # noqa: D107
|
|
190
187
|
super().__init__(DuckDBPyType("BLOB"))
|
|
191
188
|
|
|
192
189
|
|
|
193
190
|
class BooleanType(AtomicType, metaclass=DataTypeSingleton):
|
|
194
191
|
"""Boolean data type."""
|
|
195
192
|
|
|
196
|
-
def __init__(self):
|
|
193
|
+
def __init__(self) -> None: # noqa: D107
|
|
197
194
|
super().__init__(DuckDBPyType("BOOLEAN"))
|
|
198
195
|
|
|
199
196
|
|
|
200
197
|
class DateType(AtomicType, metaclass=DataTypeSingleton):
|
|
201
198
|
"""Date (datetime.date) data type."""
|
|
202
199
|
|
|
203
|
-
def __init__(self):
|
|
200
|
+
def __init__(self) -> None: # noqa: D107
|
|
204
201
|
super().__init__(DuckDBPyType("DATE"))
|
|
205
202
|
|
|
206
203
|
EPOCH_ORDINAL = datetime.datetime(1970, 1, 1).toordinal()
|
|
207
204
|
|
|
208
|
-
def needConversion(self) -> bool:
|
|
205
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
209
206
|
return True
|
|
210
207
|
|
|
211
|
-
def toInternal(self, d: datetime.date) -> int:
|
|
208
|
+
def toInternal(self, d: datetime.date) -> int: # noqa: D102
|
|
212
209
|
if d is not None:
|
|
213
210
|
return d.toordinal() - self.EPOCH_ORDINAL
|
|
214
211
|
|
|
215
|
-
def fromInternal(self, v: int) -> datetime.date:
|
|
212
|
+
def fromInternal(self, v: int) -> datetime.date: # noqa: D102
|
|
216
213
|
if v is not None:
|
|
217
214
|
return datetime.date.fromordinal(v + self.EPOCH_ORDINAL)
|
|
218
215
|
|
|
@@ -220,22 +217,22 @@ class DateType(AtomicType, metaclass=DataTypeSingleton):
|
|
|
220
217
|
class TimestampType(AtomicType, metaclass=DataTypeSingleton):
|
|
221
218
|
"""Timestamp (datetime.datetime) data type."""
|
|
222
219
|
|
|
223
|
-
def __init__(self):
|
|
220
|
+
def __init__(self) -> None: # noqa: D107
|
|
224
221
|
super().__init__(DuckDBPyType("TIMESTAMPTZ"))
|
|
225
222
|
|
|
226
223
|
@classmethod
|
|
227
|
-
def typeName(cls) -> str:
|
|
224
|
+
def typeName(cls) -> str: # noqa: D102
|
|
228
225
|
return "timestamptz"
|
|
229
226
|
|
|
230
|
-
def needConversion(self) -> bool:
|
|
227
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
231
228
|
return True
|
|
232
229
|
|
|
233
|
-
def toInternal(self, dt: datetime.datetime) -> int:
|
|
230
|
+
def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
|
|
234
231
|
if dt is not None:
|
|
235
232
|
seconds = calendar.timegm(dt.utctimetuple()) if dt.tzinfo else time.mktime(dt.timetuple())
|
|
236
233
|
return int(seconds) * 1000000 + dt.microsecond
|
|
237
234
|
|
|
238
|
-
def fromInternal(self, ts: int) -> datetime.datetime:
|
|
235
|
+
def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
|
|
239
236
|
if ts is not None:
|
|
240
237
|
# using int to avoid precision loss in float
|
|
241
238
|
return datetime.datetime.fromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
|
|
@@ -244,22 +241,22 @@ class TimestampType(AtomicType, metaclass=DataTypeSingleton):
|
|
|
244
241
|
class TimestampNTZType(AtomicType, metaclass=DataTypeSingleton):
|
|
245
242
|
"""Timestamp (datetime.datetime) data type without timezone information with microsecond precision."""
|
|
246
243
|
|
|
247
|
-
def __init__(self):
|
|
244
|
+
def __init__(self) -> None: # noqa: D107
|
|
248
245
|
super().__init__(DuckDBPyType("TIMESTAMP"))
|
|
249
246
|
|
|
250
|
-
def needConversion(self) -> bool:
|
|
247
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
251
248
|
return True
|
|
252
249
|
|
|
253
250
|
@classmethod
|
|
254
|
-
def typeName(cls) -> str:
|
|
251
|
+
def typeName(cls) -> str: # noqa: D102
|
|
255
252
|
return "timestamp"
|
|
256
253
|
|
|
257
|
-
def toInternal(self, dt: datetime.datetime) -> int:
|
|
254
|
+
def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
|
|
258
255
|
if dt is not None:
|
|
259
256
|
seconds = calendar.timegm(dt.timetuple())
|
|
260
257
|
return int(seconds) * 1000000 + dt.microsecond
|
|
261
258
|
|
|
262
|
-
def fromInternal(self, ts: int) -> datetime.datetime:
|
|
259
|
+
def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
|
|
263
260
|
if ts is not None:
|
|
264
261
|
# using int to avoid precision loss in float
|
|
265
262
|
return datetime.datetime.utcfromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
|
|
@@ -268,60 +265,60 @@ class TimestampNTZType(AtomicType, metaclass=DataTypeSingleton):
|
|
|
268
265
|
class TimestampSecondNTZType(AtomicType, metaclass=DataTypeSingleton):
|
|
269
266
|
"""Timestamp (datetime.datetime) data type without timezone information with second precision."""
|
|
270
267
|
|
|
271
|
-
def __init__(self):
|
|
268
|
+
def __init__(self) -> None: # noqa: D107
|
|
272
269
|
super().__init__(DuckDBPyType("TIMESTAMP_S"))
|
|
273
270
|
|
|
274
|
-
def needConversion(self) -> bool:
|
|
271
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
275
272
|
return True
|
|
276
273
|
|
|
277
274
|
@classmethod
|
|
278
|
-
def typeName(cls) -> str:
|
|
275
|
+
def typeName(cls) -> str: # noqa: D102
|
|
279
276
|
return "timestamp_s"
|
|
280
277
|
|
|
281
|
-
def toInternal(self, dt: datetime.datetime) -> int:
|
|
278
|
+
def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
|
|
282
279
|
raise ContributionsAcceptedError
|
|
283
280
|
|
|
284
|
-
def fromInternal(self, ts: int) -> datetime.datetime:
|
|
281
|
+
def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
|
|
285
282
|
raise ContributionsAcceptedError
|
|
286
283
|
|
|
287
284
|
|
|
288
285
|
class TimestampMilisecondNTZType(AtomicType, metaclass=DataTypeSingleton):
|
|
289
286
|
"""Timestamp (datetime.datetime) data type without timezone information with milisecond precision."""
|
|
290
287
|
|
|
291
|
-
def __init__(self):
|
|
288
|
+
def __init__(self) -> None: # noqa: D107
|
|
292
289
|
super().__init__(DuckDBPyType("TIMESTAMP_MS"))
|
|
293
290
|
|
|
294
|
-
def needConversion(self) -> bool:
|
|
291
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
295
292
|
return True
|
|
296
293
|
|
|
297
294
|
@classmethod
|
|
298
|
-
def typeName(cls) -> str:
|
|
295
|
+
def typeName(cls) -> str: # noqa: D102
|
|
299
296
|
return "timestamp_ms"
|
|
300
297
|
|
|
301
|
-
def toInternal(self, dt: datetime.datetime) -> int:
|
|
298
|
+
def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
|
|
302
299
|
raise ContributionsAcceptedError
|
|
303
300
|
|
|
304
|
-
def fromInternal(self, ts: int) -> datetime.datetime:
|
|
301
|
+
def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
|
|
305
302
|
raise ContributionsAcceptedError
|
|
306
303
|
|
|
307
304
|
|
|
308
305
|
class TimestampNanosecondNTZType(AtomicType, metaclass=DataTypeSingleton):
|
|
309
306
|
"""Timestamp (datetime.datetime) data type without timezone information with nanosecond precision."""
|
|
310
307
|
|
|
311
|
-
def __init__(self):
|
|
308
|
+
def __init__(self) -> None: # noqa: D107
|
|
312
309
|
super().__init__(DuckDBPyType("TIMESTAMP_NS"))
|
|
313
310
|
|
|
314
|
-
def needConversion(self) -> bool:
|
|
311
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
315
312
|
return True
|
|
316
313
|
|
|
317
314
|
@classmethod
|
|
318
|
-
def typeName(cls) -> str:
|
|
315
|
+
def typeName(cls) -> str: # noqa: D102
|
|
319
316
|
return "timestamp_ns"
|
|
320
317
|
|
|
321
|
-
def toInternal(self, dt: datetime.datetime) -> int:
|
|
318
|
+
def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
|
|
322
319
|
raise ContributionsAcceptedError
|
|
323
320
|
|
|
324
|
-
def fromInternal(self, ts: int) -> datetime.datetime:
|
|
321
|
+
def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
|
|
325
322
|
raise ContributionsAcceptedError
|
|
326
323
|
|
|
327
324
|
|
|
@@ -345,90 +342,90 @@ class DecimalType(FractionalType):
|
|
|
345
342
|
the number of digits on right side of dot. (default: 0)
|
|
346
343
|
"""
|
|
347
344
|
|
|
348
|
-
def __init__(self, precision: int = 10, scale: int = 0):
|
|
345
|
+
def __init__(self, precision: int = 10, scale: int = 0) -> None: # noqa: D107
|
|
349
346
|
super().__init__(duckdb.decimal_type(precision, scale))
|
|
350
347
|
self.precision = precision
|
|
351
348
|
self.scale = scale
|
|
352
349
|
self.hasPrecisionInfo = True # this is a public API
|
|
353
350
|
|
|
354
|
-
def simpleString(self) -> str:
|
|
355
|
-
return "decimal(
|
|
351
|
+
def simpleString(self) -> str: # noqa: D102
|
|
352
|
+
return f"decimal({int(self.precision):d},{int(self.scale):d})"
|
|
356
353
|
|
|
357
|
-
def __repr__(self) -> str:
|
|
358
|
-
return "DecimalType(
|
|
354
|
+
def __repr__(self) -> str: # noqa: D105
|
|
355
|
+
return f"DecimalType({int(self.precision):d},{int(self.scale):d})"
|
|
359
356
|
|
|
360
357
|
|
|
361
358
|
class DoubleType(FractionalType, metaclass=DataTypeSingleton):
|
|
362
359
|
"""Double data type, representing double precision floats."""
|
|
363
360
|
|
|
364
|
-
def __init__(self):
|
|
361
|
+
def __init__(self) -> None: # noqa: D107
|
|
365
362
|
super().__init__(DuckDBPyType("DOUBLE"))
|
|
366
363
|
|
|
367
364
|
|
|
368
365
|
class FloatType(FractionalType, metaclass=DataTypeSingleton):
|
|
369
366
|
"""Float data type, representing single precision floats."""
|
|
370
367
|
|
|
371
|
-
def __init__(self):
|
|
368
|
+
def __init__(self) -> None: # noqa: D107
|
|
372
369
|
super().__init__(DuckDBPyType("FLOAT"))
|
|
373
370
|
|
|
374
371
|
|
|
375
372
|
class ByteType(IntegralType):
|
|
376
373
|
"""Byte data type, i.e. a signed integer in a single byte."""
|
|
377
374
|
|
|
378
|
-
def __init__(self):
|
|
375
|
+
def __init__(self) -> None: # noqa: D107
|
|
379
376
|
super().__init__(DuckDBPyType("TINYINT"))
|
|
380
377
|
|
|
381
|
-
def simpleString(self) -> str:
|
|
378
|
+
def simpleString(self) -> str: # noqa: D102
|
|
382
379
|
return "tinyint"
|
|
383
380
|
|
|
384
381
|
|
|
385
382
|
class UnsignedByteType(IntegralType):
|
|
386
383
|
"""Unsigned byte data type, i.e. a unsigned integer in a single byte."""
|
|
387
384
|
|
|
388
|
-
def __init__(self):
|
|
385
|
+
def __init__(self) -> None: # noqa: D107
|
|
389
386
|
super().__init__(DuckDBPyType("UTINYINT"))
|
|
390
387
|
|
|
391
|
-
def simpleString(self) -> str:
|
|
388
|
+
def simpleString(self) -> str: # noqa: D102
|
|
392
389
|
return "utinyint"
|
|
393
390
|
|
|
394
391
|
|
|
395
392
|
class ShortType(IntegralType):
|
|
396
393
|
"""Short data type, i.e. a signed 16-bit integer."""
|
|
397
394
|
|
|
398
|
-
def __init__(self):
|
|
395
|
+
def __init__(self) -> None: # noqa: D107
|
|
399
396
|
super().__init__(DuckDBPyType("SMALLINT"))
|
|
400
397
|
|
|
401
|
-
def simpleString(self) -> str:
|
|
398
|
+
def simpleString(self) -> str: # noqa: D102
|
|
402
399
|
return "smallint"
|
|
403
400
|
|
|
404
401
|
|
|
405
402
|
class UnsignedShortType(IntegralType):
|
|
406
403
|
"""Unsigned short data type, i.e. a unsigned 16-bit integer."""
|
|
407
404
|
|
|
408
|
-
def __init__(self):
|
|
405
|
+
def __init__(self) -> None: # noqa: D107
|
|
409
406
|
super().__init__(DuckDBPyType("USMALLINT"))
|
|
410
407
|
|
|
411
|
-
def simpleString(self) -> str:
|
|
408
|
+
def simpleString(self) -> str: # noqa: D102
|
|
412
409
|
return "usmallint"
|
|
413
410
|
|
|
414
411
|
|
|
415
412
|
class IntegerType(IntegralType):
|
|
416
413
|
"""Int data type, i.e. a signed 32-bit integer."""
|
|
417
414
|
|
|
418
|
-
def __init__(self):
|
|
415
|
+
def __init__(self) -> None: # noqa: D107
|
|
419
416
|
super().__init__(DuckDBPyType("INTEGER"))
|
|
420
417
|
|
|
421
|
-
def simpleString(self) -> str:
|
|
418
|
+
def simpleString(self) -> str: # noqa: D102
|
|
422
419
|
return "integer"
|
|
423
420
|
|
|
424
421
|
|
|
425
422
|
class UnsignedIntegerType(IntegralType):
|
|
426
423
|
"""Unsigned int data type, i.e. a unsigned 32-bit integer."""
|
|
427
424
|
|
|
428
|
-
def __init__(self):
|
|
425
|
+
def __init__(self) -> None: # noqa: D107
|
|
429
426
|
super().__init__(DuckDBPyType("UINTEGER"))
|
|
430
427
|
|
|
431
|
-
def simpleString(self) -> str:
|
|
428
|
+
def simpleString(self) -> str: # noqa: D102
|
|
432
429
|
return "uinteger"
|
|
433
430
|
|
|
434
431
|
|
|
@@ -439,10 +436,10 @@ class LongType(IntegralType):
|
|
|
439
436
|
please use :class:`DecimalType`.
|
|
440
437
|
"""
|
|
441
438
|
|
|
442
|
-
def __init__(self):
|
|
439
|
+
def __init__(self) -> None: # noqa: D107
|
|
443
440
|
super().__init__(DuckDBPyType("BIGINT"))
|
|
444
441
|
|
|
445
|
-
def simpleString(self) -> str:
|
|
442
|
+
def simpleString(self) -> str: # noqa: D102
|
|
446
443
|
return "bigint"
|
|
447
444
|
|
|
448
445
|
|
|
@@ -453,44 +450,58 @@ class UnsignedLongType(IntegralType):
|
|
|
453
450
|
please use :class:`HugeIntegerType`.
|
|
454
451
|
"""
|
|
455
452
|
|
|
456
|
-
def __init__(self):
|
|
453
|
+
def __init__(self) -> None: # noqa: D107
|
|
457
454
|
super().__init__(DuckDBPyType("UBIGINT"))
|
|
458
455
|
|
|
459
|
-
def simpleString(self) -> str:
|
|
456
|
+
def simpleString(self) -> str: # noqa: D102
|
|
460
457
|
return "ubigint"
|
|
461
458
|
|
|
462
459
|
|
|
463
460
|
class HugeIntegerType(IntegralType):
|
|
464
461
|
"""Huge integer data type, i.e. a signed 128-bit integer.
|
|
465
462
|
|
|
466
|
-
If the values are beyond the range of [-
|
|
467
|
-
please use :class:`DecimalType`.
|
|
463
|
+
If the values are beyond the range of [-170141183460469231731687303715884105728,
|
|
464
|
+
170141183460469231731687303715884105727], please use :class:`DecimalType`.
|
|
468
465
|
"""
|
|
469
466
|
|
|
470
|
-
def __init__(self):
|
|
467
|
+
def __init__(self) -> None: # noqa: D107
|
|
471
468
|
super().__init__(DuckDBPyType("HUGEINT"))
|
|
472
469
|
|
|
473
|
-
def simpleString(self) -> str:
|
|
470
|
+
def simpleString(self) -> str: # noqa: D102
|
|
474
471
|
return "hugeint"
|
|
475
472
|
|
|
476
473
|
|
|
474
|
+
class UnsignedHugeIntegerType(IntegralType):
|
|
475
|
+
"""Unsigned huge integer data type, i.e. a unsigned 128-bit integer.
|
|
476
|
+
|
|
477
|
+
If the values are beyond the range of [0, 340282366920938463463374607431768211455],
|
|
478
|
+
please use :class:`DecimalType`.
|
|
479
|
+
"""
|
|
480
|
+
|
|
481
|
+
def __init__(self) -> None: # noqa: D107
|
|
482
|
+
super().__init__(DuckDBPyType("UHUGEINT"))
|
|
483
|
+
|
|
484
|
+
def simpleString(self) -> str: # noqa: D102
|
|
485
|
+
return "uhugeint"
|
|
486
|
+
|
|
487
|
+
|
|
477
488
|
class TimeType(IntegralType):
|
|
478
489
|
"""Time (datetime.time) data type."""
|
|
479
490
|
|
|
480
|
-
def __init__(self):
|
|
491
|
+
def __init__(self) -> None: # noqa: D107
|
|
481
492
|
super().__init__(DuckDBPyType("TIMETZ"))
|
|
482
493
|
|
|
483
|
-
def simpleString(self) -> str:
|
|
494
|
+
def simpleString(self) -> str: # noqa: D102
|
|
484
495
|
return "timetz"
|
|
485
496
|
|
|
486
497
|
|
|
487
498
|
class TimeNTZType(IntegralType):
|
|
488
499
|
"""Time (datetime.time) data type without timezone information."""
|
|
489
500
|
|
|
490
|
-
def __init__(self):
|
|
501
|
+
def __init__(self) -> None: # noqa: D107
|
|
491
502
|
super().__init__(DuckDBPyType("TIME"))
|
|
492
503
|
|
|
493
|
-
def simpleString(self) -> str:
|
|
504
|
+
def simpleString(self) -> str: # noqa: D102
|
|
494
505
|
return "time"
|
|
495
506
|
|
|
496
507
|
|
|
@@ -502,16 +513,18 @@ class DayTimeIntervalType(AtomicType):
|
|
|
502
513
|
MINUTE = 2
|
|
503
514
|
SECOND = 3
|
|
504
515
|
|
|
505
|
-
_fields =
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
516
|
+
_fields: Mapping[str, int] = MappingProxyType(
|
|
517
|
+
{
|
|
518
|
+
DAY: "day",
|
|
519
|
+
HOUR: "hour",
|
|
520
|
+
MINUTE: "minute",
|
|
521
|
+
SECOND: "second",
|
|
522
|
+
}
|
|
523
|
+
)
|
|
511
524
|
|
|
512
|
-
_inverted_fields = dict(zip(_fields.values(), _fields.keys()))
|
|
525
|
+
_inverted_fields: Mapping[int, str] = MappingProxyType(dict(zip(_fields.values(), _fields.keys())))
|
|
513
526
|
|
|
514
|
-
def __init__(self, startField: Optional[int] = None, endField: Optional[int] = None):
|
|
527
|
+
def __init__(self, startField: Optional[int] = None, endField: Optional[int] = None) -> None: # noqa: D107
|
|
515
528
|
super().__init__(DuckDBPyType("INTERVAL"))
|
|
516
529
|
if startField is None and endField is None:
|
|
517
530
|
# Default matched to scala side.
|
|
@@ -521,33 +534,34 @@ class DayTimeIntervalType(AtomicType):
|
|
|
521
534
|
endField = startField
|
|
522
535
|
|
|
523
536
|
fields = DayTimeIntervalType._fields
|
|
524
|
-
if startField not in fields
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
self.
|
|
537
|
+
if startField not in fields or endField not in fields:
|
|
538
|
+
msg = f"interval {startField} to {endField} is invalid"
|
|
539
|
+
raise RuntimeError(msg)
|
|
540
|
+
self.startField = cast("int", startField)
|
|
541
|
+
self.endField = cast("int", endField)
|
|
528
542
|
|
|
529
543
|
def _str_repr(self) -> str:
|
|
530
544
|
fields = DayTimeIntervalType._fields
|
|
531
545
|
start_field_name = fields[self.startField]
|
|
532
546
|
end_field_name = fields[self.endField]
|
|
533
547
|
if start_field_name == end_field_name:
|
|
534
|
-
return "interval
|
|
548
|
+
return f"interval {start_field_name}"
|
|
535
549
|
else:
|
|
536
|
-
return "interval
|
|
550
|
+
return f"interval {start_field_name} to {end_field_name}"
|
|
537
551
|
|
|
538
552
|
simpleString = _str_repr
|
|
539
553
|
|
|
540
|
-
def __repr__(self) -> str:
|
|
541
|
-
return "
|
|
554
|
+
def __repr__(self) -> str: # noqa: D105
|
|
555
|
+
return f"{type(self).__name__}({int(self.startField):d}, {int(self.endField):d})"
|
|
542
556
|
|
|
543
|
-
def needConversion(self) -> bool:
|
|
557
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
544
558
|
return True
|
|
545
559
|
|
|
546
|
-
def toInternal(self, dt: datetime.timedelta) -> Optional[int]:
|
|
560
|
+
def toInternal(self, dt: datetime.timedelta) -> Optional[int]: # noqa: D102
|
|
547
561
|
if dt is not None:
|
|
548
562
|
return (math.floor(dt.total_seconds()) * 1000000) + dt.microseconds
|
|
549
563
|
|
|
550
|
-
def fromInternal(self, micros: int) -> Optional[datetime.timedelta]:
|
|
564
|
+
def fromInternal(self, micros: int) -> Optional[datetime.timedelta]: # noqa: D102
|
|
551
565
|
if micros is not None:
|
|
552
566
|
return datetime.timedelta(microseconds=micros)
|
|
553
567
|
|
|
@@ -562,7 +576,7 @@ class ArrayType(DataType):
|
|
|
562
576
|
containsNull : bool, optional
|
|
563
577
|
whether the array can contain null (None) values.
|
|
564
578
|
|
|
565
|
-
Examples
|
|
579
|
+
Examples:
|
|
566
580
|
--------
|
|
567
581
|
>>> ArrayType(StringType()) == ArrayType(StringType(), True)
|
|
568
582
|
True
|
|
@@ -570,30 +584,27 @@ class ArrayType(DataType):
|
|
|
570
584
|
False
|
|
571
585
|
"""
|
|
572
586
|
|
|
573
|
-
def __init__(self, elementType: DataType, containsNull: bool = True):
|
|
587
|
+
def __init__(self, elementType: DataType, containsNull: bool = True) -> None: # noqa: D107
|
|
574
588
|
super().__init__(duckdb.list_type(elementType.duckdb_type))
|
|
575
|
-
assert isinstance(elementType, DataType), "elementType
|
|
576
|
-
elementType,
|
|
577
|
-
DataType,
|
|
578
|
-
)
|
|
589
|
+
assert isinstance(elementType, DataType), f"elementType {elementType} should be an instance of {DataType}"
|
|
579
590
|
self.elementType = elementType
|
|
580
591
|
self.containsNull = containsNull
|
|
581
592
|
|
|
582
|
-
def simpleString(self) -> str:
|
|
583
|
-
return "array
|
|
593
|
+
def simpleString(self) -> str: # noqa: D102
|
|
594
|
+
return f"array<{self.elementType.simpleString()}>"
|
|
584
595
|
|
|
585
|
-
def __repr__(self) -> str:
|
|
586
|
-
return "ArrayType(
|
|
596
|
+
def __repr__(self) -> str: # noqa: D105
|
|
597
|
+
return f"ArrayType({self.elementType}, {self.containsNull!s})"
|
|
587
598
|
|
|
588
|
-
def needConversion(self) -> bool:
|
|
599
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
589
600
|
return self.elementType.needConversion()
|
|
590
601
|
|
|
591
|
-
def toInternal(self, obj:
|
|
602
|
+
def toInternal(self, obj: list[Optional[T]]) -> list[Optional[T]]: # noqa: D102
|
|
592
603
|
if not self.needConversion():
|
|
593
604
|
return obj
|
|
594
605
|
return obj and [self.elementType.toInternal(v) for v in obj]
|
|
595
606
|
|
|
596
|
-
def fromInternal(self, obj:
|
|
607
|
+
def fromInternal(self, obj: list[Optional[T]]) -> list[Optional[T]]: # noqa: D102
|
|
597
608
|
if not self.needConversion():
|
|
598
609
|
return obj
|
|
599
610
|
return obj and [self.elementType.fromInternal(v) for v in obj]
|
|
@@ -611,59 +622,44 @@ class MapType(DataType):
|
|
|
611
622
|
valueContainsNull : bool, optional
|
|
612
623
|
indicates whether values can contain null (None) values.
|
|
613
624
|
|
|
614
|
-
Notes
|
|
625
|
+
Notes:
|
|
615
626
|
-----
|
|
616
627
|
Keys in a map data type are not allowed to be null (None).
|
|
617
628
|
|
|
618
|
-
Examples
|
|
629
|
+
Examples:
|
|
619
630
|
--------
|
|
620
|
-
>>> (MapType(StringType(), IntegerType())
|
|
621
|
-
... == MapType(StringType(), IntegerType(), True))
|
|
631
|
+
>>> (MapType(StringType(), IntegerType()) == MapType(StringType(), IntegerType(), True))
|
|
622
632
|
True
|
|
623
|
-
>>> (MapType(StringType(), IntegerType(), False)
|
|
624
|
-
... == MapType(StringType(), FloatType()))
|
|
633
|
+
>>> (MapType(StringType(), IntegerType(), False) == MapType(StringType(), FloatType()))
|
|
625
634
|
False
|
|
626
635
|
"""
|
|
627
636
|
|
|
628
|
-
def __init__(self, keyType: DataType, valueType: DataType, valueContainsNull: bool = True):
|
|
637
|
+
def __init__(self, keyType: DataType, valueType: DataType, valueContainsNull: bool = True) -> None: # noqa: D107
|
|
629
638
|
super().__init__(duckdb.map_type(keyType.duckdb_type, valueType.duckdb_type))
|
|
630
|
-
assert isinstance(keyType, DataType), "keyType
|
|
631
|
-
|
|
632
|
-
DataType,
|
|
633
|
-
)
|
|
634
|
-
assert isinstance(valueType, DataType), "valueType %s should be an instance of %s" % (
|
|
635
|
-
valueType,
|
|
636
|
-
DataType,
|
|
637
|
-
)
|
|
639
|
+
assert isinstance(keyType, DataType), f"keyType {keyType} should be an instance of {DataType}"
|
|
640
|
+
assert isinstance(valueType, DataType), f"valueType {valueType} should be an instance of {DataType}"
|
|
638
641
|
self.keyType = keyType
|
|
639
642
|
self.valueType = valueType
|
|
640
643
|
self.valueContainsNull = valueContainsNull
|
|
641
644
|
|
|
642
|
-
def simpleString(self) -> str:
|
|
643
|
-
return "map
|
|
644
|
-
self.keyType.simpleString(),
|
|
645
|
-
self.valueType.simpleString(),
|
|
646
|
-
)
|
|
645
|
+
def simpleString(self) -> str: # noqa: D102
|
|
646
|
+
return f"map<{self.keyType.simpleString()},{self.valueType.simpleString()}>"
|
|
647
647
|
|
|
648
|
-
def __repr__(self) -> str:
|
|
649
|
-
return "MapType(
|
|
650
|
-
self.keyType,
|
|
651
|
-
self.valueType,
|
|
652
|
-
str(self.valueContainsNull),
|
|
653
|
-
)
|
|
648
|
+
def __repr__(self) -> str: # noqa: D105
|
|
649
|
+
return f"MapType({self.keyType}, {self.valueType}, {self.valueContainsNull!s})"
|
|
654
650
|
|
|
655
|
-
def needConversion(self) -> bool:
|
|
651
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
656
652
|
return self.keyType.needConversion() or self.valueType.needConversion()
|
|
657
653
|
|
|
658
|
-
def toInternal(self, obj:
|
|
654
|
+
def toInternal(self, obj: dict[T, Optional[U]]) -> dict[T, Optional[U]]: # noqa: D102
|
|
659
655
|
if not self.needConversion():
|
|
660
656
|
return obj
|
|
661
|
-
return obj and
|
|
657
|
+
return obj and {self.keyType.toInternal(k): self.valueType.toInternal(v) for k, v in obj.items()}
|
|
662
658
|
|
|
663
|
-
def fromInternal(self, obj:
|
|
659
|
+
def fromInternal(self, obj: dict[T, Optional[U]]) -> dict[T, Optional[U]]: # noqa: D102
|
|
664
660
|
if not self.needConversion():
|
|
665
661
|
return obj
|
|
666
|
-
return obj and
|
|
662
|
+
return obj and {self.keyType.fromInternal(k): self.valueType.fromInternal(v) for k, v in obj.items()}
|
|
667
663
|
|
|
668
664
|
|
|
669
665
|
class StructField(DataType):
|
|
@@ -680,66 +676,58 @@ class StructField(DataType):
|
|
|
680
676
|
metadata : dict, optional
|
|
681
677
|
a dict from string to simple type that can be toInternald to JSON automatically
|
|
682
678
|
|
|
683
|
-
Examples
|
|
679
|
+
Examples:
|
|
684
680
|
--------
|
|
685
|
-
>>> (StructField("f1", StringType(), True)
|
|
686
|
-
... == StructField("f1", StringType(), True))
|
|
681
|
+
>>> (StructField("f1", StringType(), True) == StructField("f1", StringType(), True))
|
|
687
682
|
True
|
|
688
|
-
>>> (StructField("f1", StringType(), True)
|
|
689
|
-
... == StructField("f2", StringType(), True))
|
|
683
|
+
>>> (StructField("f1", StringType(), True) == StructField("f2", StringType(), True))
|
|
690
684
|
False
|
|
691
685
|
"""
|
|
692
686
|
|
|
693
|
-
def __init__(
|
|
687
|
+
def __init__( # noqa: D107
|
|
694
688
|
self,
|
|
695
689
|
name: str,
|
|
696
690
|
dataType: DataType,
|
|
697
691
|
nullable: bool = True,
|
|
698
|
-
metadata: Optional[
|
|
699
|
-
):
|
|
692
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
693
|
+
) -> None:
|
|
700
694
|
super().__init__(dataType.duckdb_type)
|
|
701
|
-
assert isinstance(dataType, DataType), "dataType
|
|
702
|
-
|
|
703
|
-
DataType,
|
|
704
|
-
)
|
|
705
|
-
assert isinstance(name, str), "field name %s should be a string" % (name)
|
|
695
|
+
assert isinstance(dataType, DataType), f"dataType {dataType} should be an instance of {DataType}"
|
|
696
|
+
assert isinstance(name, str), f"field name {name} should be a string"
|
|
706
697
|
self.name = name
|
|
707
698
|
self.dataType = dataType
|
|
708
699
|
self.nullable = nullable
|
|
709
700
|
self.metadata = metadata or {}
|
|
710
701
|
|
|
711
|
-
def simpleString(self) -> str:
|
|
712
|
-
return "
|
|
702
|
+
def simpleString(self) -> str: # noqa: D102
|
|
703
|
+
return f"{self.name}:{self.dataType.simpleString()}"
|
|
713
704
|
|
|
714
|
-
def __repr__(self) -> str:
|
|
715
|
-
return "StructField('
|
|
716
|
-
self.name,
|
|
717
|
-
self.dataType,
|
|
718
|
-
str(self.nullable),
|
|
719
|
-
)
|
|
705
|
+
def __repr__(self) -> str: # noqa: D105
|
|
706
|
+
return f"StructField('{self.name}', {self.dataType}, {self.nullable!s})"
|
|
720
707
|
|
|
721
|
-
def needConversion(self) -> bool:
|
|
708
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
722
709
|
return self.dataType.needConversion()
|
|
723
710
|
|
|
724
|
-
def toInternal(self, obj: T) -> T:
|
|
711
|
+
def toInternal(self, obj: T) -> T: # noqa: D102
|
|
725
712
|
return self.dataType.toInternal(obj)
|
|
726
713
|
|
|
727
|
-
def fromInternal(self, obj: T) -> T:
|
|
714
|
+
def fromInternal(self, obj: T) -> T: # noqa: D102
|
|
728
715
|
return self.dataType.fromInternal(obj)
|
|
729
716
|
|
|
730
|
-
def typeName(self) -> str: # type: ignore[override]
|
|
731
|
-
|
|
717
|
+
def typeName(self) -> str: # type: ignore[override] # noqa: D102
|
|
718
|
+
msg = "StructField does not have typeName. Use typeName on its type explicitly instead."
|
|
719
|
+
raise TypeError(msg)
|
|
732
720
|
|
|
733
721
|
|
|
734
722
|
class StructType(DataType):
|
|
735
|
-
"""Struct type, consisting of a list of :class:`StructField`.
|
|
723
|
+
r"""Struct type, consisting of a list of :class:`StructField`.
|
|
736
724
|
|
|
737
725
|
This is the data type representing a :class:`Row`.
|
|
738
726
|
|
|
739
727
|
Iterating a :class:`StructType` will iterate over its :class:`StructField`\\s.
|
|
740
728
|
A contained :class:`StructField` can be accessed by its name or position.
|
|
741
729
|
|
|
742
|
-
Examples
|
|
730
|
+
Examples:
|
|
743
731
|
--------
|
|
744
732
|
>>> struct1 = StructType([StructField("f1", StringType(), True)])
|
|
745
733
|
>>> struct1["f1"]
|
|
@@ -752,16 +740,17 @@ class StructType(DataType):
|
|
|
752
740
|
>>> struct1 == struct2
|
|
753
741
|
True
|
|
754
742
|
>>> struct1 = StructType([StructField("f1", StringType(), True)])
|
|
755
|
-
>>> struct2 = StructType(
|
|
756
|
-
... StructField("f2", IntegerType(), False)]
|
|
743
|
+
>>> struct2 = StructType(
|
|
744
|
+
... [StructField("f1", StringType(), True), StructField("f2", IntegerType(), False)]
|
|
745
|
+
... )
|
|
757
746
|
>>> struct1 == struct2
|
|
758
747
|
False
|
|
759
748
|
"""
|
|
760
749
|
|
|
761
|
-
def _update_internal_duckdb_type(self):
|
|
750
|
+
def _update_internal_duckdb_type(self) -> None:
|
|
762
751
|
self.duckdb_type = duckdb.struct_type(dict(zip(self.names, [x.duckdb_type for x in self.fields])))
|
|
763
752
|
|
|
764
|
-
def __init__(self, fields: Optional[
|
|
753
|
+
def __init__(self, fields: Optional[list[StructField]] = None) -> None: # noqa: D107
|
|
765
754
|
if not fields:
|
|
766
755
|
self.fields = []
|
|
767
756
|
self.names = []
|
|
@@ -780,23 +769,20 @@ class StructType(DataType):
|
|
|
780
769
|
field: str,
|
|
781
770
|
data_type: Union[str, DataType],
|
|
782
771
|
nullable: bool = True,
|
|
783
|
-
metadata: Optional[
|
|
784
|
-
) -> "StructType":
|
|
785
|
-
...
|
|
772
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
773
|
+
) -> "StructType": ...
|
|
786
774
|
|
|
787
775
|
@overload
|
|
788
|
-
def add(self, field: StructField) -> "StructType":
|
|
789
|
-
...
|
|
776
|
+
def add(self, field: StructField) -> "StructType": ...
|
|
790
777
|
|
|
791
778
|
def add(
|
|
792
779
|
self,
|
|
793
780
|
field: Union[str, StructField],
|
|
794
781
|
data_type: Optional[Union[str, DataType]] = None,
|
|
795
782
|
nullable: bool = True,
|
|
796
|
-
metadata: Optional[
|
|
783
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
797
784
|
) -> "StructType":
|
|
798
|
-
"""
|
|
799
|
-
Construct a :class:`StructType` by adding new elements to it, to define the schema.
|
|
785
|
+
r"""Construct a :class:`StructType` by adding new elements to it, to define the schema.
|
|
800
786
|
The method accepts either:
|
|
801
787
|
|
|
802
788
|
a) A single parameter which is a :class:`StructField` object.
|
|
@@ -815,11 +801,11 @@ class StructType(DataType):
|
|
|
815
801
|
metadata : dict, optional
|
|
816
802
|
Any additional metadata (default None)
|
|
817
803
|
|
|
818
|
-
Returns
|
|
804
|
+
Returns:
|
|
819
805
|
-------
|
|
820
806
|
:class:`StructType`
|
|
821
807
|
|
|
822
|
-
Examples
|
|
808
|
+
Examples:
|
|
823
809
|
--------
|
|
824
810
|
>>> struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
|
|
825
811
|
>>> struct2 = StructType([StructField("f1", StringType(), True), \\
|
|
@@ -834,13 +820,14 @@ class StructType(DataType):
|
|
|
834
820
|
>>> struct2 = StructType([StructField("f1", StringType(), True)])
|
|
835
821
|
>>> struct1 == struct2
|
|
836
822
|
True
|
|
837
|
-
"""
|
|
823
|
+
""" # noqa: D205, D415
|
|
838
824
|
if isinstance(field, StructField):
|
|
839
825
|
self.fields.append(field)
|
|
840
826
|
self.names.append(field.name)
|
|
841
827
|
else:
|
|
842
828
|
if isinstance(field, str) and data_type is None:
|
|
843
|
-
|
|
829
|
+
msg = "Must specify DataType if passing name of struct_field to create."
|
|
830
|
+
raise ValueError(msg)
|
|
844
831
|
else:
|
|
845
832
|
data_type_f = data_type
|
|
846
833
|
self.fields.append(StructField(field, data_type_f, nullable, metadata))
|
|
@@ -852,7 +839,7 @@ class StructType(DataType):
|
|
|
852
839
|
return self
|
|
853
840
|
|
|
854
841
|
def __iter__(self) -> Iterator[StructField]:
|
|
855
|
-
"""Iterate the fields"""
|
|
842
|
+
"""Iterate the fields."""
|
|
856
843
|
return iter(self.fields)
|
|
857
844
|
|
|
858
845
|
def __len__(self) -> int:
|
|
@@ -865,24 +852,30 @@ class StructType(DataType):
|
|
|
865
852
|
for field in self:
|
|
866
853
|
if field.name == key:
|
|
867
854
|
return field
|
|
868
|
-
|
|
855
|
+
msg = f"No StructField named {key}"
|
|
856
|
+
raise KeyError(msg)
|
|
869
857
|
elif isinstance(key, int):
|
|
870
858
|
try:
|
|
871
859
|
return self.fields[key]
|
|
872
860
|
except IndexError:
|
|
873
|
-
|
|
861
|
+
msg = "StructType index out of range"
|
|
862
|
+
raise IndexError(msg) # noqa: B904
|
|
874
863
|
elif isinstance(key, slice):
|
|
875
864
|
return StructType(self.fields[key])
|
|
876
865
|
else:
|
|
877
|
-
|
|
866
|
+
msg = "StructType keys should be strings, integers or slices"
|
|
867
|
+
raise TypeError(msg)
|
|
878
868
|
|
|
879
|
-
def simpleString(self) -> str:
|
|
880
|
-
return "struct
|
|
869
|
+
def simpleString(self) -> str: # noqa: D102
|
|
870
|
+
return "struct<{}>".format(",".join(f.simpleString() for f in self))
|
|
881
871
|
|
|
882
|
-
def __repr__(self) -> str:
|
|
883
|
-
return "StructType([
|
|
872
|
+
def __repr__(self) -> str: # noqa: D105
|
|
873
|
+
return "StructType([{}])".format(", ".join(str(field) for field in self))
|
|
884
874
|
|
|
885
|
-
def
|
|
875
|
+
def __contains__(self, item: str) -> bool: # noqa: D105
|
|
876
|
+
return item in self.names
|
|
877
|
+
|
|
878
|
+
def extract_types_and_names(self) -> tuple[list[str], list[str]]: # noqa: D102
|
|
886
879
|
names = []
|
|
887
880
|
types = []
|
|
888
881
|
for f in self.fields:
|
|
@@ -890,11 +883,10 @@ class StructType(DataType):
|
|
|
890
883
|
names.append(f.name)
|
|
891
884
|
return (types, names)
|
|
892
885
|
|
|
893
|
-
def fieldNames(self) ->
|
|
894
|
-
"""
|
|
895
|
-
Returns all field names in a list.
|
|
886
|
+
def fieldNames(self) -> list[str]:
|
|
887
|
+
"""Returns all field names in a list.
|
|
896
888
|
|
|
897
|
-
Examples
|
|
889
|
+
Examples:
|
|
898
890
|
--------
|
|
899
891
|
>>> struct = StructType([StructField("f1", StringType(), True)])
|
|
900
892
|
>>> struct.fieldNames()
|
|
@@ -902,11 +894,11 @@ class StructType(DataType):
|
|
|
902
894
|
"""
|
|
903
895
|
return list(self.names)
|
|
904
896
|
|
|
905
|
-
def needConversion(self) -> bool:
|
|
897
|
+
def needConversion(self) -> bool: # noqa: D102
|
|
906
898
|
# We need convert Row()/namedtuple into tuple()
|
|
907
899
|
return True
|
|
908
900
|
|
|
909
|
-
def toInternal(self, obj:
|
|
901
|
+
def toInternal(self, obj: tuple) -> tuple: # noqa: D102
|
|
910
902
|
if obj is None:
|
|
911
903
|
return
|
|
912
904
|
|
|
@@ -926,7 +918,8 @@ class StructType(DataType):
|
|
|
926
918
|
for n, f, c in zip(self.names, self.fields, self._needConversion)
|
|
927
919
|
)
|
|
928
920
|
else:
|
|
929
|
-
|
|
921
|
+
msg = f"Unexpected tuple {obj!r} with StructType"
|
|
922
|
+
raise ValueError(msg)
|
|
930
923
|
else:
|
|
931
924
|
if isinstance(obj, dict):
|
|
932
925
|
return tuple(obj.get(n) for n in self.names)
|
|
@@ -936,16 +929,17 @@ class StructType(DataType):
|
|
|
936
929
|
d = obj.__dict__
|
|
937
930
|
return tuple(d.get(n) for n in self.names)
|
|
938
931
|
else:
|
|
939
|
-
|
|
932
|
+
msg = f"Unexpected tuple {obj!r} with StructType"
|
|
933
|
+
raise ValueError(msg)
|
|
940
934
|
|
|
941
|
-
def fromInternal(self, obj:
|
|
935
|
+
def fromInternal(self, obj: tuple) -> "Row": # noqa: D102
|
|
942
936
|
if obj is None:
|
|
943
937
|
return
|
|
944
938
|
if isinstance(obj, Row):
|
|
945
939
|
# it's already converted by pickler
|
|
946
940
|
return obj
|
|
947
941
|
|
|
948
|
-
values: Union[
|
|
942
|
+
values: Union[tuple, list]
|
|
949
943
|
if self._needSerializeAnyField:
|
|
950
944
|
# Only calling fromInternal function for fields that need conversion
|
|
951
945
|
values = [f.fromInternal(v) if c else v for f, v, c in zip(self.fields, obj, self._needConversion)]
|
|
@@ -955,7 +949,7 @@ class StructType(DataType):
|
|
|
955
949
|
|
|
956
950
|
|
|
957
951
|
class UnionType(DataType):
|
|
958
|
-
def __init__(self):
|
|
952
|
+
def __init__(self) -> None:
|
|
959
953
|
raise ContributionsAcceptedError
|
|
960
954
|
|
|
961
955
|
|
|
@@ -965,7 +959,7 @@ class UserDefinedType(DataType):
|
|
|
965
959
|
.. note:: WARN: Spark Internal Use Only
|
|
966
960
|
"""
|
|
967
961
|
|
|
968
|
-
def __init__(self):
|
|
962
|
+
def __init__(self) -> None:
|
|
969
963
|
raise ContributionsAcceptedError
|
|
970
964
|
|
|
971
965
|
@classmethod
|
|
@@ -974,24 +968,21 @@ class UserDefinedType(DataType):
|
|
|
974
968
|
|
|
975
969
|
@classmethod
|
|
976
970
|
def sqlType(cls) -> DataType:
|
|
977
|
-
"""
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
raise NotImplementedError("UDT must implement sqlType().")
|
|
971
|
+
"""Underlying SQL storage type for this UDT."""
|
|
972
|
+
msg = "UDT must implement sqlType()."
|
|
973
|
+
raise NotImplementedError(msg)
|
|
981
974
|
|
|
982
975
|
@classmethod
|
|
983
976
|
def module(cls) -> str:
|
|
984
|
-
"""
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
raise NotImplementedError("UDT must implement module().")
|
|
977
|
+
"""The Python module of the UDT."""
|
|
978
|
+
msg = "UDT must implement module()."
|
|
979
|
+
raise NotImplementedError(msg)
|
|
988
980
|
|
|
989
981
|
@classmethod
|
|
990
982
|
def scalaUDT(cls) -> str:
|
|
991
|
-
"""
|
|
992
|
-
The class name of the paired Scala UDT (could be '', if there
|
|
983
|
+
"""The class name of the paired Scala UDT (could be '', if there
|
|
993
984
|
is no corresponding one).
|
|
994
|
-
"""
|
|
985
|
+
""" # noqa: D205
|
|
995
986
|
return ""
|
|
996
987
|
|
|
997
988
|
def needConversion(self) -> bool:
|
|
@@ -999,42 +990,38 @@ class UserDefinedType(DataType):
|
|
|
999
990
|
|
|
1000
991
|
@classmethod
|
|
1001
992
|
def _cachedSqlType(cls) -> DataType:
|
|
1002
|
-
"""
|
|
1003
|
-
Cache the sqlType() into class, because it's heavily used in `toInternal`.
|
|
1004
|
-
"""
|
|
993
|
+
"""Cache the sqlType() into class, because it's heavily used in `toInternal`."""
|
|
1005
994
|
if not hasattr(cls, "_cached_sql_type"):
|
|
1006
995
|
cls._cached_sql_type = cls.sqlType() # type: ignore[attr-defined]
|
|
1007
996
|
return cls._cached_sql_type # type: ignore[attr-defined]
|
|
1008
997
|
|
|
1009
|
-
def toInternal(self, obj: Any) -> Any:
|
|
998
|
+
def toInternal(self, obj: Any) -> Any: # noqa: ANN401
|
|
1010
999
|
if obj is not None:
|
|
1011
1000
|
return self._cachedSqlType().toInternal(self.serialize(obj))
|
|
1012
1001
|
|
|
1013
|
-
def fromInternal(self, obj: Any) -> Any:
|
|
1002
|
+
def fromInternal(self, obj: Any) -> Any: # noqa: ANN401
|
|
1014
1003
|
v = self._cachedSqlType().fromInternal(obj)
|
|
1015
1004
|
if v is not None:
|
|
1016
1005
|
return self.deserialize(v)
|
|
1017
1006
|
|
|
1018
|
-
def serialize(self, obj: Any) ->
|
|
1019
|
-
"""
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
raise NotImplementedError("UDT must implement toInternal().")
|
|
1007
|
+
def serialize(self, obj: Any) -> NoReturn: # noqa: ANN401
|
|
1008
|
+
"""Converts a user-type object into a SQL datum."""
|
|
1009
|
+
msg = "UDT must implement toInternal()."
|
|
1010
|
+
raise NotImplementedError(msg)
|
|
1023
1011
|
|
|
1024
|
-
def deserialize(self, datum: Any) ->
|
|
1025
|
-
"""
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
raise NotImplementedError("UDT must implement fromInternal().")
|
|
1012
|
+
def deserialize(self, datum: Any) -> NoReturn: # noqa: ANN401
|
|
1013
|
+
"""Converts a SQL datum into a user-type object."""
|
|
1014
|
+
msg = "UDT must implement fromInternal()."
|
|
1015
|
+
raise NotImplementedError(msg)
|
|
1029
1016
|
|
|
1030
1017
|
def simpleString(self) -> str:
|
|
1031
1018
|
return "udt"
|
|
1032
1019
|
|
|
1033
|
-
def __eq__(self, other:
|
|
1034
|
-
return type(self)
|
|
1020
|
+
def __eq__(self, other: object) -> bool:
|
|
1021
|
+
return type(self) is type(other)
|
|
1035
1022
|
|
|
1036
1023
|
|
|
1037
|
-
_atomic_types:
|
|
1024
|
+
_atomic_types: list[type[DataType]] = [
|
|
1038
1025
|
StringType,
|
|
1039
1026
|
BinaryType,
|
|
1040
1027
|
BooleanType,
|
|
@@ -1050,33 +1037,28 @@ _atomic_types: List[Type[DataType]] = [
|
|
|
1050
1037
|
TimestampNTZType,
|
|
1051
1038
|
NullType,
|
|
1052
1039
|
]
|
|
1053
|
-
_all_atomic_types:
|
|
1040
|
+
_all_atomic_types: dict[str, type[DataType]] = {t.typeName(): t for t in _atomic_types}
|
|
1054
1041
|
|
|
1055
|
-
_complex_types:
|
|
1042
|
+
_complex_types: list[type[Union[ArrayType, MapType, StructType]]] = [
|
|
1056
1043
|
ArrayType,
|
|
1057
1044
|
MapType,
|
|
1058
1045
|
StructType,
|
|
1059
1046
|
]
|
|
1060
|
-
_all_complex_types:
|
|
1061
|
-
(v.typeName(), v) for v in _complex_types
|
|
1062
|
-
)
|
|
1047
|
+
_all_complex_types: dict[str, type[Union[ArrayType, MapType, StructType]]] = {v.typeName(): v for v in _complex_types}
|
|
1063
1048
|
|
|
1064
|
-
import re
|
|
1065
1049
|
|
|
1066
1050
|
_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)")
|
|
1067
1051
|
_INTERVAL_DAYTIME = re.compile(r"interval (day|hour|minute|second)( to (day|hour|minute|second))?")
|
|
1068
1052
|
|
|
1069
1053
|
|
|
1070
|
-
def _create_row(fields: Union["Row",
|
|
1054
|
+
def _create_row(fields: Union["Row", list[str]], values: Union[tuple[Any, ...], list[Any]]) -> "Row":
|
|
1071
1055
|
row = Row(*values)
|
|
1072
1056
|
row.__fields__ = fields
|
|
1073
1057
|
return row
|
|
1074
1058
|
|
|
1075
1059
|
|
|
1076
1060
|
class Row(tuple):
|
|
1077
|
-
|
|
1078
|
-
"""
|
|
1079
|
-
A row in :class:`DataFrame`.
|
|
1061
|
+
"""A row in :class:`DataFrame`.
|
|
1080
1062
|
The fields in it can be accessed:
|
|
1081
1063
|
|
|
1082
1064
|
* like attributes (``row.key``)
|
|
@@ -1093,18 +1075,18 @@ class Row(tuple):
|
|
|
1093
1075
|
field names sorted alphabetically and will be ordered in the position as
|
|
1094
1076
|
entered.
|
|
1095
1077
|
|
|
1096
|
-
Examples
|
|
1078
|
+
Examples:
|
|
1097
1079
|
--------
|
|
1098
1080
|
>>> row = Row(name="Alice", age=11)
|
|
1099
1081
|
>>> row
|
|
1100
1082
|
Row(name='Alice', age=11)
|
|
1101
|
-
>>> row[
|
|
1083
|
+
>>> row["name"], row["age"]
|
|
1102
1084
|
('Alice', 11)
|
|
1103
1085
|
>>> row.name, row.age
|
|
1104
1086
|
('Alice', 11)
|
|
1105
|
-
>>>
|
|
1087
|
+
>>> "name" in row
|
|
1106
1088
|
True
|
|
1107
|
-
>>>
|
|
1089
|
+
>>> "wrong_key" in row
|
|
1108
1090
|
False
|
|
1109
1091
|
|
|
1110
1092
|
Row also can be used to create another Row like class, then it
|
|
@@ -1113,9 +1095,9 @@ class Row(tuple):
|
|
|
1113
1095
|
>>> Person = Row("name", "age")
|
|
1114
1096
|
>>> Person
|
|
1115
1097
|
<Row('name', 'age')>
|
|
1116
|
-
>>>
|
|
1098
|
+
>>> "name" in Person
|
|
1117
1099
|
True
|
|
1118
|
-
>>>
|
|
1100
|
+
>>> "wrong_key" in Person
|
|
1119
1101
|
False
|
|
1120
1102
|
>>> Person("Alice", 11)
|
|
1121
1103
|
Row(name='Alice', age=11)
|
|
@@ -1127,19 +1109,18 @@ class Row(tuple):
|
|
|
1127
1109
|
>>> row2 = Row(name="Alice", age=11)
|
|
1128
1110
|
>>> row1 == row2
|
|
1129
1111
|
True
|
|
1130
|
-
"""
|
|
1112
|
+
""" # noqa: D205, D415
|
|
1131
1113
|
|
|
1132
1114
|
@overload
|
|
1133
|
-
def __new__(cls, *args: str) -> "Row":
|
|
1134
|
-
...
|
|
1115
|
+
def __new__(cls, *args: str) -> "Row": ...
|
|
1135
1116
|
|
|
1136
1117
|
@overload
|
|
1137
|
-
def __new__(cls, **kwargs: Any) -> "Row":
|
|
1138
|
-
...
|
|
1118
|
+
def __new__(cls, **kwargs: Any) -> "Row": ... # noqa: ANN401
|
|
1139
1119
|
|
|
1140
|
-
def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row":
|
|
1120
|
+
def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row": # noqa: D102
|
|
1141
1121
|
if args and kwargs:
|
|
1142
|
-
|
|
1122
|
+
msg = "Can not use both args and kwargs to create Row"
|
|
1123
|
+
raise ValueError(msg)
|
|
1143
1124
|
if kwargs:
|
|
1144
1125
|
# create row objects
|
|
1145
1126
|
row = tuple.__new__(cls, list(kwargs.values()))
|
|
@@ -1149,16 +1130,15 @@ class Row(tuple):
|
|
|
1149
1130
|
# create row class or objects
|
|
1150
1131
|
return tuple.__new__(cls, args)
|
|
1151
1132
|
|
|
1152
|
-
def asDict(self, recursive: bool = False) ->
|
|
1153
|
-
"""
|
|
1154
|
-
Return as a dict
|
|
1133
|
+
def asDict(self, recursive: bool = False) -> dict[str, Any]:
|
|
1134
|
+
"""Return as a dict.
|
|
1155
1135
|
|
|
1156
1136
|
Parameters
|
|
1157
1137
|
----------
|
|
1158
1138
|
recursive : bool, optional
|
|
1159
1139
|
turns the nested Rows to dict (default: False).
|
|
1160
1140
|
|
|
1161
|
-
Notes
|
|
1141
|
+
Notes:
|
|
1162
1142
|
-----
|
|
1163
1143
|
If a row contains duplicate field names, e.g., the rows of a join
|
|
1164
1144
|
between two :class:`DataFrame` that both have the fields of same names,
|
|
@@ -1166,28 +1146,29 @@ class Row(tuple):
|
|
|
1166
1146
|
will also return one of the duplicate fields, however returned value might
|
|
1167
1147
|
be different to ``asDict``.
|
|
1168
1148
|
|
|
1169
|
-
Examples
|
|
1149
|
+
Examples:
|
|
1170
1150
|
--------
|
|
1171
|
-
>>> Row(name="Alice", age=11).asDict() == {
|
|
1151
|
+
>>> Row(name="Alice", age=11).asDict() == {"name": "Alice", "age": 11}
|
|
1172
1152
|
True
|
|
1173
|
-
>>> row = Row(key=1, value=Row(name=
|
|
1174
|
-
>>> row.asDict() == {
|
|
1153
|
+
>>> row = Row(key=1, value=Row(name="a", age=2))
|
|
1154
|
+
>>> row.asDict() == {"key": 1, "value": Row(name="a", age=2)}
|
|
1175
1155
|
True
|
|
1176
|
-
>>> row.asDict(True) == {
|
|
1156
|
+
>>> row.asDict(True) == {"key": 1, "value": {"name": "a", "age": 2}}
|
|
1177
1157
|
True
|
|
1178
1158
|
"""
|
|
1179
1159
|
if not hasattr(self, "__fields__"):
|
|
1180
|
-
|
|
1160
|
+
msg = "Cannot convert a Row class into dict"
|
|
1161
|
+
raise TypeError(msg)
|
|
1181
1162
|
|
|
1182
1163
|
if recursive:
|
|
1183
1164
|
|
|
1184
|
-
def conv(obj:
|
|
1165
|
+
def conv(obj: Union[Row, list, dict, object]) -> Union[list, dict, object]:
|
|
1185
1166
|
if isinstance(obj, Row):
|
|
1186
1167
|
return obj.asDict(True)
|
|
1187
1168
|
elif isinstance(obj, list):
|
|
1188
1169
|
return [conv(o) for o in obj]
|
|
1189
1170
|
elif isinstance(obj, dict):
|
|
1190
|
-
return
|
|
1171
|
+
return {k: conv(v) for k, v in obj.items()}
|
|
1191
1172
|
else:
|
|
1192
1173
|
return obj
|
|
1193
1174
|
|
|
@@ -1195,35 +1176,34 @@ class Row(tuple):
|
|
|
1195
1176
|
else:
|
|
1196
1177
|
return dict(zip(self.__fields__, self))
|
|
1197
1178
|
|
|
1198
|
-
def __contains__(self, item: Any) -> bool:
|
|
1179
|
+
def __contains__(self, item: Any) -> bool: # noqa: D105, ANN401
|
|
1199
1180
|
if hasattr(self, "__fields__"):
|
|
1200
1181
|
return item in self.__fields__
|
|
1201
1182
|
else:
|
|
1202
|
-
return super(
|
|
1183
|
+
return super().__contains__(item)
|
|
1203
1184
|
|
|
1204
1185
|
# let object acts like class
|
|
1205
|
-
def __call__(self, *args: Any) -> "Row":
|
|
1206
|
-
"""
|
|
1186
|
+
def __call__(self, *args: Any) -> "Row": # noqa: ANN401
|
|
1187
|
+
"""Create new Row object."""
|
|
1207
1188
|
if len(args) > len(self):
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
)
|
|
1189
|
+
msg = f"Can not create Row with fields {self}, expected {len(self):d} values but got {args}"
|
|
1190
|
+
raise ValueError(msg)
|
|
1211
1191
|
return _create_row(self, args)
|
|
1212
1192
|
|
|
1213
|
-
def __getitem__(self, item: Any) -> Any:
|
|
1193
|
+
def __getitem__(self, item: Any) -> Any: # noqa: D105, ANN401
|
|
1214
1194
|
if isinstance(item, (int, slice)):
|
|
1215
|
-
return super(
|
|
1195
|
+
return super().__getitem__(item)
|
|
1216
1196
|
try:
|
|
1217
1197
|
# it will be slow when it has many fields,
|
|
1218
1198
|
# but this will not be used in normal cases
|
|
1219
1199
|
idx = self.__fields__.index(item)
|
|
1220
|
-
return super(
|
|
1200
|
+
return super().__getitem__(idx)
|
|
1221
1201
|
except IndexError:
|
|
1222
|
-
raise KeyError(item)
|
|
1202
|
+
raise KeyError(item) # noqa: B904
|
|
1223
1203
|
except ValueError:
|
|
1224
|
-
raise ValueError(item)
|
|
1204
|
+
raise ValueError(item) # noqa: B904
|
|
1225
1205
|
|
|
1226
|
-
def __getattr__(self, item: str) -> Any:
|
|
1206
|
+
def __getattr__(self, item: str) -> Any: # noqa: D105, ANN401
|
|
1227
1207
|
if item.startswith("__"):
|
|
1228
1208
|
raise AttributeError(item)
|
|
1229
1209
|
try:
|
|
@@ -1232,18 +1212,19 @@ class Row(tuple):
|
|
|
1232
1212
|
idx = self.__fields__.index(item)
|
|
1233
1213
|
return self[idx]
|
|
1234
1214
|
except IndexError:
|
|
1235
|
-
raise AttributeError(item)
|
|
1215
|
+
raise AttributeError(item) # noqa: B904
|
|
1236
1216
|
except ValueError:
|
|
1237
|
-
raise AttributeError(item)
|
|
1217
|
+
raise AttributeError(item) # noqa: B904
|
|
1238
1218
|
|
|
1239
|
-
def __setattr__(self, key: Any, value: Any) -> None:
|
|
1219
|
+
def __setattr__(self, key: Any, value: Any) -> None: # noqa: D105, ANN401
|
|
1240
1220
|
if key != "__fields__":
|
|
1241
|
-
|
|
1221
|
+
msg = "Row is read-only"
|
|
1222
|
+
raise RuntimeError(msg)
|
|
1242
1223
|
self.__dict__[key] = value
|
|
1243
1224
|
|
|
1244
1225
|
def __reduce__(
|
|
1245
1226
|
self,
|
|
1246
|
-
) -> Union[str,
|
|
1227
|
+
) -> Union[str, tuple[Any, ...]]:
|
|
1247
1228
|
"""Returns a tuple so Python knows how to pickle Row."""
|
|
1248
1229
|
if hasattr(self, "__fields__"):
|
|
1249
1230
|
return (_create_row, (self.__fields__, tuple(self)))
|
|
@@ -1253,6 +1234,6 @@ class Row(tuple):
|
|
|
1253
1234
|
def __repr__(self) -> str:
|
|
1254
1235
|
"""Printable representation of Row used in Python REPL."""
|
|
1255
1236
|
if hasattr(self, "__fields__"):
|
|
1256
|
-
return "Row(
|
|
1237
|
+
return "Row({})".format(", ".join(f"{k}={v!r}" for k, v in zip(self.__fields__, tuple(self))))
|
|
1257
1238
|
else:
|
|
1258
|
-
return "<Row(
|
|
1239
|
+
return "<Row({})>".format(", ".join(f"{field!r}" for field in self))
|