duckdb 0.8.2.dev3007__cp311-cp311-win_amd64.whl → 1.4.3.dev8__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. _duckdb-stubs/__init__.pyi +1478 -0
  2. _duckdb-stubs/_func.pyi +46 -0
  3. _duckdb-stubs/_sqltypes.pyi +75 -0
  4. duckdb/duckdb.cp311-win_amd64.pyd → _duckdb.cp311-win_amd64.pyd +0 -0
  5. adbc_driver_duckdb/__init__.py +10 -8
  6. adbc_driver_duckdb/dbapi.py +4 -5
  7. duckdb/__init__.py +250 -196
  8. duckdb/_dbapi_type_object.py +231 -0
  9. duckdb/_version.py +22 -0
  10. {pyduckdb → duckdb}/bytes_io_wrapper.py +12 -8
  11. duckdb/experimental/__init__.py +5 -0
  12. duckdb/experimental/spark/__init__.py +6 -0
  13. {pyduckdb → duckdb/experimental}/spark/_globals.py +8 -8
  14. duckdb/experimental/spark/_typing.py +46 -0
  15. duckdb/experimental/spark/conf.py +46 -0
  16. duckdb/experimental/spark/context.py +180 -0
  17. duckdb/experimental/spark/errors/__init__.py +70 -0
  18. duckdb/experimental/spark/errors/error_classes.py +918 -0
  19. duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
  20. duckdb/experimental/spark/errors/exceptions/base.py +168 -0
  21. duckdb/experimental/spark/errors/utils.py +111 -0
  22. duckdb/experimental/spark/exception.py +18 -0
  23. {pyduckdb → duckdb/experimental}/spark/sql/__init__.py +5 -5
  24. duckdb/experimental/spark/sql/_typing.py +86 -0
  25. duckdb/experimental/spark/sql/catalog.py +79 -0
  26. duckdb/experimental/spark/sql/column.py +361 -0
  27. duckdb/experimental/spark/sql/conf.py +24 -0
  28. duckdb/experimental/spark/sql/dataframe.py +1389 -0
  29. duckdb/experimental/spark/sql/functions.py +6195 -0
  30. duckdb/experimental/spark/sql/group.py +424 -0
  31. duckdb/experimental/spark/sql/readwriter.py +435 -0
  32. duckdb/experimental/spark/sql/session.py +297 -0
  33. duckdb/experimental/spark/sql/streaming.py +36 -0
  34. duckdb/experimental/spark/sql/type_utils.py +107 -0
  35. {pyduckdb → duckdb/experimental}/spark/sql/types.py +323 -342
  36. duckdb/experimental/spark/sql/udf.py +37 -0
  37. duckdb/filesystem.py +33 -0
  38. duckdb/func/__init__.py +3 -0
  39. duckdb/functional/__init__.py +12 -16
  40. duckdb/polars_io.py +284 -0
  41. duckdb/py.typed +0 -0
  42. duckdb/query_graph/__main__.py +358 -0
  43. duckdb/sqltypes/__init__.py +63 -0
  44. duckdb/typing/__init__.py +18 -6
  45. {pyduckdb → duckdb}/udf.py +10 -5
  46. duckdb/value/__init__.py +1 -0
  47. pyduckdb/value/constant.py → duckdb/value/constant/__init__.py +66 -57
  48. duckdb-1.4.3.dev8.dist-info/METADATA +88 -0
  49. duckdb-1.4.3.dev8.dist-info/RECORD +52 -0
  50. {duckdb-0.8.2.dev3007.dist-info → duckdb-1.4.3.dev8.dist-info}/WHEEL +1 -1
  51. duckdb-1.4.3.dev8.dist-info/licenses/LICENSE +7 -0
  52. duckdb-0.8.2.dev3007.dist-info/METADATA +0 -20
  53. duckdb-0.8.2.dev3007.dist-info/RECORD +0 -34
  54. duckdb-0.8.2.dev3007.dist-info/top_level.txt +0 -4
  55. duckdb-stubs/__init__.pyi +0 -574
  56. duckdb-stubs/functional/__init__.pyi +0 -33
  57. duckdb-stubs/typing/__init__.pyi +0 -35
  58. pyduckdb/__init__.py +0 -61
  59. pyduckdb/filesystem.py +0 -64
  60. pyduckdb/spark/__init__.py +0 -7
  61. pyduckdb/spark/conf.py +0 -45
  62. pyduckdb/spark/context.py +0 -162
  63. pyduckdb/spark/exception.py +0 -9
  64. pyduckdb/spark/sql/catalog.py +0 -78
  65. pyduckdb/spark/sql/conf.py +0 -23
  66. pyduckdb/spark/sql/dataframe.py +0 -75
  67. pyduckdb/spark/sql/readwriter.py +0 -180
  68. pyduckdb/spark/sql/session.py +0 -249
  69. pyduckdb/spark/sql/streaming.py +0 -37
  70. pyduckdb/spark/sql/type_utils.py +0 -104
  71. pyduckdb/spark/sql/udf.py +0 -9
  72. {pyduckdb → duckdb/experimental}/spark/LICENSE +0 -0
@@ -1,28 +1,28 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'spark' folder.
1
+ # ruff: noqa: D100
2
+ # This code is based on code from Apache Spark under the license found in the LICENSE
3
+ # file located in the 'spark' folder.
2
4
 
5
+ import calendar
6
+ import datetime
7
+ import math
8
+ import re
9
+ import time
10
+ from builtins import tuple
11
+ from collections.abc import Iterator, Mapping
12
+ from types import MappingProxyType
3
13
  from typing import (
4
- cast,
5
- overload,
6
- Dict,
7
- Optional,
8
- List,
9
- Tuple,
10
14
  Any,
11
- Union,
12
- Type,
13
- TypeVar,
14
15
  ClassVar,
15
- Iterator,
16
- TYPE_CHECKING,
16
+ NoReturn,
17
+ Optional,
18
+ TypeVar,
19
+ Union,
20
+ cast,
21
+ overload,
17
22
  )
18
- from builtins import tuple
19
- import datetime
20
- import calendar
21
- import time
22
- import math
23
23
 
24
24
  import duckdb
25
- from duckdb.typing import DuckDBPyType
25
+ from duckdb.sqltypes import DuckDBPyType
26
26
 
27
27
  from ..exception import ContributionsAcceptedError
28
28
 
@@ -30,104 +30,100 @@ T = TypeVar("T")
30
30
  U = TypeVar("U")
31
31
 
32
32
  __all__ = [
33
- "DataType",
34
- "NullType",
35
- "StringType",
33
+ "ArrayType",
36
34
  "BinaryType",
37
- "UUIDType",
38
35
  "BitstringType",
39
36
  "BooleanType",
37
+ "ByteType",
38
+ "DataType",
40
39
  "DateType",
41
- "TimestampType",
42
- "TimestampNTZType",
43
- "TimestampNanosecondNTZType",
44
- "TimestampMilisecondNTZType",
45
- "TimestampSecondNTZType",
46
- "TimeType",
47
- "TimeNTZType",
40
+ "DayTimeIntervalType",
48
41
  "DecimalType",
49
42
  "DoubleType",
50
43
  "FloatType",
51
- "ByteType",
52
- "UnsignedByteType",
53
- "ShortType",
54
- "UnsignedShortType",
44
+ "HugeIntegerType",
55
45
  "IntegerType",
56
- "UnsignedIntegerType",
57
46
  "LongType",
58
- "UnsignedLongType",
59
- "HugeIntegerType",
60
- "DayTimeIntervalType",
61
- "Row",
62
- "ArrayType",
63
47
  "MapType",
48
+ "NullType",
49
+ "Row",
50
+ "ShortType",
51
+ "StringType",
64
52
  "StructField",
65
53
  "StructType",
54
+ "TimeNTZType",
55
+ "TimeType",
56
+ "TimestampMilisecondNTZType",
57
+ "TimestampNTZType",
58
+ "TimestampNanosecondNTZType",
59
+ "TimestampSecondNTZType",
60
+ "TimestampType",
61
+ "UUIDType",
62
+ "UnsignedByteType",
63
+ "UnsignedHugeIntegerType",
64
+ "UnsignedIntegerType",
65
+ "UnsignedLongType",
66
+ "UnsignedShortType",
66
67
  ]
67
68
 
68
69
 
69
70
  class DataType:
70
71
  """Base class for data types."""
71
72
 
72
- def __init__(self, duckdb_type):
73
+ def __init__(self, duckdb_type: DuckDBPyType) -> None: # noqa: D107
73
74
  self.duckdb_type = duckdb_type
74
75
 
75
- def __repr__(self) -> str:
76
+ def __repr__(self) -> str: # noqa: D105
76
77
  return self.__class__.__name__ + "()"
77
78
 
78
- def __hash__(self) -> int:
79
+ def __hash__(self) -> int: # noqa: D105
79
80
  return hash(str(self))
80
81
 
81
- def __eq__(self, other: Any) -> bool:
82
+ def __eq__(self, other: object) -> bool: # noqa: D105
82
83
  return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
83
84
 
84
- def __ne__(self, other: Any) -> bool:
85
+ def __ne__(self, other: object) -> bool: # noqa: D105
85
86
  return not self.__eq__(other)
86
87
 
87
88
  @classmethod
88
- def typeName(cls) -> str:
89
+ def typeName(cls) -> str: # noqa: D102
89
90
  return cls.__name__[:-4].lower()
90
91
 
91
- def simpleString(self) -> str:
92
+ def simpleString(self) -> str: # noqa: D102
92
93
  return self.typeName()
93
94
 
94
- def jsonValue(self) -> Union[str, Dict[str, Any]]:
95
+ def jsonValue(self) -> Union[str, dict[str, Any]]: # noqa: D102
95
96
  raise ContributionsAcceptedError
96
97
 
97
- def json(self) -> str:
98
+ def json(self) -> str: # noqa: D102
98
99
  raise ContributionsAcceptedError
99
100
 
100
101
  def needConversion(self) -> bool:
101
- """
102
- Does this type needs conversion between Python object and internal SQL object.
102
+ """Does this type needs conversion between Python object and internal SQL object.
103
103
 
104
104
  This is used to avoid the unnecessary conversion for ArrayType/MapType/StructType.
105
105
  """
106
106
  return False
107
107
 
108
- def toInternal(self, obj: Any) -> Any:
109
- """
110
- Converts a Python object into an internal SQL object.
111
- """
108
+ def toInternal(self, obj: Any) -> Any: # noqa: ANN401
109
+ """Converts a Python object into an internal SQL object."""
112
110
  return obj
113
111
 
114
- def fromInternal(self, obj: Any) -> Any:
115
- """
116
- Converts an internal SQL object into a native Python object.
117
- """
112
+ def fromInternal(self, obj: Any) -> Any: # noqa: ANN401
113
+ """Converts an internal SQL object into a native Python object."""
118
114
  return obj
119
115
 
120
116
 
121
117
  # This singleton pattern does not work with pickle, you will get
122
118
  # another object after pickle and unpickle
123
119
  class DataTypeSingleton(type):
124
- """Metaclass for DataType"""
120
+ """Metaclass for DataType."""
125
121
 
126
- _instances: ClassVar[Dict[Type["DataTypeSingleton"], "DataTypeSingleton"]] = {}
122
+ _instances: ClassVar[dict[type["DataTypeSingleton"], "DataTypeSingleton"]] = {}
127
123
 
128
- def __call__(cls: Type[T]) -> T: # type: ignore[override]
124
+ def __call__(cls: type[T]) -> T: # type: ignore[override]
129
125
  if cls not in cls._instances: # type: ignore[attr-defined]
130
- cls._instances[cls] = super(DataTypeSingleton, cls).__call__() # type: ignore[misc, attr-defined]
126
+ cls._instances[cls] = super().__call__() # type: ignore[misc, attr-defined]
131
127
  return cls._instances[cls] # type: ignore[attr-defined]
132
128
 
133
129
 
@@ -137,17 +133,18 @@ class NullType(DataType, metaclass=DataTypeSingleton):
137
133
  The data type representing None, used for the types that cannot be inferred.
138
134
  """
139
135
 
140
- def __init__(self):
136
+ def __init__(self) -> None: # noqa: D107
141
137
  super().__init__(DuckDBPyType("NULL"))
142
138
 
143
139
  @classmethod
144
- def typeName(cls) -> str:
140
+ def typeName(cls) -> str: # noqa: D102
145
141
  return "void"
146
142
 
147
143
 
148
144
  class AtomicType(DataType):
149
145
  """An internal type used to represent everything that is not
150
- null, UDTs, arrays, structs, and maps."""
146
+ null, UDTs, arrays, structs, and maps.
147
+ """ # noqa: D205
151
148
 
152
149
 
153
150
  class NumericType(AtomicType):
@@ -165,54 +162,54 @@ class FractionalType(NumericType):
165
162
  class StringType(AtomicType, metaclass=DataTypeSingleton):
166
163
  """String data type."""
167
164
 
168
- def __init__(self):
165
+ def __init__(self) -> None: # noqa: D107
169
166
  super().__init__(DuckDBPyType("VARCHAR"))
170
167
 
171
168
 
172
169
  class BitstringType(AtomicType, metaclass=DataTypeSingleton):
173
170
  """Bitstring data type."""
174
171
 
175
- def __init__(self):
172
+ def __init__(self) -> None: # noqa: D107
176
173
  super().__init__(DuckDBPyType("BIT"))
177
174
 
178
175
 
179
176
  class UUIDType(AtomicType, metaclass=DataTypeSingleton):
180
177
  """UUID data type."""
181
178
 
182
- def __init__(self):
179
+ def __init__(self) -> None: # noqa: D107
183
180
  super().__init__(DuckDBPyType("UUID"))
184
181
 
185
182
 
186
183
  class BinaryType(AtomicType, metaclass=DataTypeSingleton):
187
184
  """Binary (byte array) data type."""
188
185
 
189
- def __init__(self):
186
+ def __init__(self) -> None: # noqa: D107
190
187
  super().__init__(DuckDBPyType("BLOB"))
191
188
 
192
189
 
193
190
  class BooleanType(AtomicType, metaclass=DataTypeSingleton):
194
191
  """Boolean data type."""
195
192
 
196
- def __init__(self):
193
+ def __init__(self) -> None: # noqa: D107
197
194
  super().__init__(DuckDBPyType("BOOLEAN"))
198
195
 
199
196
 
200
197
  class DateType(AtomicType, metaclass=DataTypeSingleton):
201
198
  """Date (datetime.date) data type."""
202
199
 
203
- def __init__(self):
200
+ def __init__(self) -> None: # noqa: D107
204
201
  super().__init__(DuckDBPyType("DATE"))
205
202
 
206
203
  EPOCH_ORDINAL = datetime.datetime(1970, 1, 1).toordinal()
207
204
 
208
- def needConversion(self) -> bool:
205
+ def needConversion(self) -> bool: # noqa: D102
209
206
  return True
210
207
 
211
- def toInternal(self, d: datetime.date) -> int:
208
+ def toInternal(self, d: datetime.date) -> int: # noqa: D102
212
209
  if d is not None:
213
210
  return d.toordinal() - self.EPOCH_ORDINAL
214
211
 
215
- def fromInternal(self, v: int) -> datetime.date:
212
+ def fromInternal(self, v: int) -> datetime.date: # noqa: D102
216
213
  if v is not None:
217
214
  return datetime.date.fromordinal(v + self.EPOCH_ORDINAL)
218
215
 
@@ -220,22 +217,22 @@ class DateType(AtomicType, metaclass=DataTypeSingleton):
220
217
  class TimestampType(AtomicType, metaclass=DataTypeSingleton):
221
218
  """Timestamp (datetime.datetime) data type."""
222
219
 
223
- def __init__(self):
220
+ def __init__(self) -> None: # noqa: D107
224
221
  super().__init__(DuckDBPyType("TIMESTAMPTZ"))
225
222
 
226
223
  @classmethod
227
- def typeName(cls) -> str:
224
+ def typeName(cls) -> str: # noqa: D102
228
225
  return "timestamptz"
229
226
 
230
- def needConversion(self) -> bool:
227
+ def needConversion(self) -> bool: # noqa: D102
231
228
  return True
232
229
 
233
- def toInternal(self, dt: datetime.datetime) -> int:
230
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
234
231
  if dt is not None:
235
232
  seconds = calendar.timegm(dt.utctimetuple()) if dt.tzinfo else time.mktime(dt.timetuple())
236
233
  return int(seconds) * 1000000 + dt.microsecond
237
234
 
238
- def fromInternal(self, ts: int) -> datetime.datetime:
235
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
239
236
  if ts is not None:
240
237
  # using int to avoid precision loss in float
241
238
  return datetime.datetime.fromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
@@ -244,22 +241,22 @@ class TimestampType(AtomicType, metaclass=DataTypeSingleton):
244
241
  class TimestampNTZType(AtomicType, metaclass=DataTypeSingleton):
245
242
  """Timestamp (datetime.datetime) data type without timezone information with microsecond precision."""
246
243
 
247
- def __init__(self):
244
+ def __init__(self) -> None: # noqa: D107
248
245
  super().__init__(DuckDBPyType("TIMESTAMP"))
249
246
 
250
- def needConversion(self) -> bool:
247
+ def needConversion(self) -> bool: # noqa: D102
251
248
  return True
252
249
 
253
250
  @classmethod
254
- def typeName(cls) -> str:
251
+ def typeName(cls) -> str: # noqa: D102
255
252
  return "timestamp"
256
253
 
257
- def toInternal(self, dt: datetime.datetime) -> int:
254
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
258
255
  if dt is not None:
259
256
  seconds = calendar.timegm(dt.timetuple())
260
257
  return int(seconds) * 1000000 + dt.microsecond
261
258
 
262
- def fromInternal(self, ts: int) -> datetime.datetime:
259
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
263
260
  if ts is not None:
264
261
  # using int to avoid precision loss in float
265
262
  return datetime.datetime.utcfromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
@@ -268,60 +265,60 @@ class TimestampNTZType(AtomicType, metaclass=DataTypeSingleton):
268
265
  class TimestampSecondNTZType(AtomicType, metaclass=DataTypeSingleton):
269
266
  """Timestamp (datetime.datetime) data type without timezone information with second precision."""
270
267
 
271
- def __init__(self):
268
+ def __init__(self) -> None: # noqa: D107
272
269
  super().__init__(DuckDBPyType("TIMESTAMP_S"))
273
270
 
274
- def needConversion(self) -> bool:
271
+ def needConversion(self) -> bool: # noqa: D102
275
272
  return True
276
273
 
277
274
  @classmethod
278
- def typeName(cls) -> str:
275
+ def typeName(cls) -> str: # noqa: D102
279
276
  return "timestamp_s"
280
277
 
281
- def toInternal(self, dt: datetime.datetime) -> int:
278
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
282
279
  raise ContributionsAcceptedError
283
280
 
284
- def fromInternal(self, ts: int) -> datetime.datetime:
281
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
285
282
  raise ContributionsAcceptedError
286
283
 
287
284
 
288
285
  class TimestampMilisecondNTZType(AtomicType, metaclass=DataTypeSingleton):
289
286
  """Timestamp (datetime.datetime) data type without timezone information with milisecond precision."""
290
287
 
291
- def __init__(self):
288
+ def __init__(self) -> None: # noqa: D107
292
289
  super().__init__(DuckDBPyType("TIMESTAMP_MS"))
293
290
 
294
- def needConversion(self) -> bool:
291
+ def needConversion(self) -> bool: # noqa: D102
295
292
  return True
296
293
 
297
294
  @classmethod
298
- def typeName(cls) -> str:
295
+ def typeName(cls) -> str: # noqa: D102
299
296
  return "timestamp_ms"
300
297
 
301
- def toInternal(self, dt: datetime.datetime) -> int:
298
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
302
299
  raise ContributionsAcceptedError
303
300
 
304
- def fromInternal(self, ts: int) -> datetime.datetime:
301
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
305
302
  raise ContributionsAcceptedError
306
303
 
307
304
 
308
305
  class TimestampNanosecondNTZType(AtomicType, metaclass=DataTypeSingleton):
309
306
  """Timestamp (datetime.datetime) data type without timezone information with nanosecond precision."""
310
307
 
311
- def __init__(self):
308
+ def __init__(self) -> None: # noqa: D107
312
309
  super().__init__(DuckDBPyType("TIMESTAMP_NS"))
313
310
 
314
- def needConversion(self) -> bool:
311
+ def needConversion(self) -> bool: # noqa: D102
315
312
  return True
316
313
 
317
314
  @classmethod
318
- def typeName(cls) -> str:
315
+ def typeName(cls) -> str: # noqa: D102
319
316
  return "timestamp_ns"
320
317
 
321
- def toInternal(self, dt: datetime.datetime) -> int:
318
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
322
319
  raise ContributionsAcceptedError
323
320
 
324
- def fromInternal(self, ts: int) -> datetime.datetime:
321
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
325
322
  raise ContributionsAcceptedError
326
323
 
327
324
 
@@ -345,90 +342,90 @@ class DecimalType(FractionalType):
345
342
  the number of digits on right side of dot. (default: 0)
346
343
  """
347
344
 
348
- def __init__(self, precision: int = 10, scale: int = 0):
345
+ def __init__(self, precision: int = 10, scale: int = 0) -> None: # noqa: D107
349
346
  super().__init__(duckdb.decimal_type(precision, scale))
350
347
  self.precision = precision
351
348
  self.scale = scale
352
349
  self.hasPrecisionInfo = True # this is a public API
353
350
 
354
- def simpleString(self) -> str:
355
- return "decimal(%d,%d)" % (self.precision, self.scale)
351
+ def simpleString(self) -> str: # noqa: D102
352
+ return f"decimal({int(self.precision):d},{int(self.scale):d})"
356
353
 
357
- def __repr__(self) -> str:
358
- return "DecimalType(%d,%d)" % (self.precision, self.scale)
354
+ def __repr__(self) -> str: # noqa: D105
355
+ return f"DecimalType({int(self.precision):d},{int(self.scale):d})"
359
356
 
360
357
 
361
358
  class DoubleType(FractionalType, metaclass=DataTypeSingleton):
362
359
  """Double data type, representing double precision floats."""
363
360
 
364
- def __init__(self):
361
+ def __init__(self) -> None: # noqa: D107
365
362
  super().__init__(DuckDBPyType("DOUBLE"))
366
363
 
367
364
 
368
365
  class FloatType(FractionalType, metaclass=DataTypeSingleton):
369
366
  """Float data type, representing single precision floats."""
370
367
 
371
- def __init__(self):
368
+ def __init__(self) -> None: # noqa: D107
372
369
  super().__init__(DuckDBPyType("FLOAT"))
373
370
 
374
371
 
375
372
  class ByteType(IntegralType):
376
373
  """Byte data type, i.e. a signed integer in a single byte."""
377
374
 
378
- def __init__(self):
375
+ def __init__(self) -> None: # noqa: D107
379
376
  super().__init__(DuckDBPyType("TINYINT"))
380
377
 
381
- def simpleString(self) -> str:
378
+ def simpleString(self) -> str: # noqa: D102
382
379
  return "tinyint"
383
380
 
384
381
 
385
382
  class UnsignedByteType(IntegralType):
386
383
  """Unsigned byte data type, i.e. a unsigned integer in a single byte."""
387
384
 
388
- def __init__(self):
385
+ def __init__(self) -> None: # noqa: D107
389
386
  super().__init__(DuckDBPyType("UTINYINT"))
390
387
 
391
- def simpleString(self) -> str:
388
+ def simpleString(self) -> str: # noqa: D102
392
389
  return "utinyint"
393
390
 
394
391
 
395
392
  class ShortType(IntegralType):
396
393
  """Short data type, i.e. a signed 16-bit integer."""
397
394
 
398
- def __init__(self):
395
+ def __init__(self) -> None: # noqa: D107
399
396
  super().__init__(DuckDBPyType("SMALLINT"))
400
397
 
401
- def simpleString(self) -> str:
398
+ def simpleString(self) -> str: # noqa: D102
402
399
  return "smallint"
403
400
 
404
401
 
405
402
  class UnsignedShortType(IntegralType):
406
403
  """Unsigned short data type, i.e. a unsigned 16-bit integer."""
407
404
 
408
- def __init__(self):
405
+ def __init__(self) -> None: # noqa: D107
409
406
  super().__init__(DuckDBPyType("USMALLINT"))
410
407
 
411
- def simpleString(self) -> str:
408
+ def simpleString(self) -> str: # noqa: D102
412
409
  return "usmallint"
413
410
 
414
411
 
415
412
  class IntegerType(IntegralType):
416
413
  """Int data type, i.e. a signed 32-bit integer."""
417
414
 
418
- def __init__(self):
415
+ def __init__(self) -> None: # noqa: D107
419
416
  super().__init__(DuckDBPyType("INTEGER"))
420
417
 
421
- def simpleString(self) -> str:
418
+ def simpleString(self) -> str: # noqa: D102
422
419
  return "integer"
423
420
 
424
421
 
425
422
  class UnsignedIntegerType(IntegralType):
426
423
  """Unsigned int data type, i.e. a unsigned 32-bit integer."""
427
424
 
428
- def __init__(self):
425
+ def __init__(self) -> None: # noqa: D107
429
426
  super().__init__(DuckDBPyType("UINTEGER"))
430
427
 
431
- def simpleString(self) -> str:
428
+ def simpleString(self) -> str: # noqa: D102
432
429
  return "uinteger"
433
430
 
434
431
 
@@ -439,10 +436,10 @@ class LongType(IntegralType):
439
436
  please use :class:`DecimalType`.
440
437
  """
441
438
 
442
- def __init__(self):
439
+ def __init__(self) -> None: # noqa: D107
443
440
  super().__init__(DuckDBPyType("BIGINT"))
444
441
 
445
- def simpleString(self) -> str:
442
+ def simpleString(self) -> str: # noqa: D102
446
443
  return "bigint"
447
444
 
448
445
 
@@ -453,44 +450,58 @@ class UnsignedLongType(IntegralType):
453
450
  please use :class:`HugeIntegerType`.
454
451
  """
455
452
 
456
- def __init__(self):
453
+ def __init__(self) -> None: # noqa: D107
457
454
  super().__init__(DuckDBPyType("UBIGINT"))
458
455
 
459
- def simpleString(self) -> str:
456
+ def simpleString(self) -> str: # noqa: D102
460
457
  return "ubigint"
461
458
 
462
459
 
463
460
  class HugeIntegerType(IntegralType):
464
461
  """Huge integer data type, i.e. a signed 128-bit integer.
465
462
 
466
- If the values are beyond the range of [-170141183460469231731687303715884105727, 170141183460469231731687303715884105727],
467
- please use :class:`DecimalType`.
463
+ If the values are beyond the range of [-170141183460469231731687303715884105728,
464
+ 170141183460469231731687303715884105727], please use :class:`DecimalType`.
468
465
  """
469
466
 
470
- def __init__(self):
467
+ def __init__(self) -> None: # noqa: D107
471
468
  super().__init__(DuckDBPyType("HUGEINT"))
472
469
 
473
- def simpleString(self) -> str:
470
+ def simpleString(self) -> str: # noqa: D102
474
471
  return "hugeint"
475
472
 
476
473
 
474
+ class UnsignedHugeIntegerType(IntegralType):
475
+ """Unsigned huge integer data type, i.e. a unsigned 128-bit integer.
476
+
477
+ If the values are beyond the range of [0, 340282366920938463463374607431768211455],
478
+ please use :class:`DecimalType`.
479
+ """
480
+
481
+ def __init__(self) -> None: # noqa: D107
482
+ super().__init__(DuckDBPyType("UHUGEINT"))
483
+
484
+ def simpleString(self) -> str: # noqa: D102
485
+ return "uhugeint"
486
+
487
+
477
488
  class TimeType(IntegralType):
478
489
  """Time (datetime.time) data type."""
479
490
 
480
- def __init__(self):
491
+ def __init__(self) -> None: # noqa: D107
481
492
  super().__init__(DuckDBPyType("TIMETZ"))
482
493
 
483
- def simpleString(self) -> str:
494
+ def simpleString(self) -> str: # noqa: D102
484
495
  return "timetz"
485
496
 
486
497
 
487
498
  class TimeNTZType(IntegralType):
488
499
  """Time (datetime.time) data type without timezone information."""
489
500
 
490
- def __init__(self):
501
+ def __init__(self) -> None: # noqa: D107
491
502
  super().__init__(DuckDBPyType("TIME"))
492
503
 
493
- def simpleString(self) -> str:
504
+ def simpleString(self) -> str: # noqa: D102
494
505
  return "time"
495
506
 
496
507
 
@@ -502,16 +513,18 @@ class DayTimeIntervalType(AtomicType):
502
513
  MINUTE = 2
503
514
  SECOND = 3
504
515
 
505
- _fields = {
506
- DAY: "day",
507
- HOUR: "hour",
508
- MINUTE: "minute",
509
- SECOND: "second",
510
- }
516
+ _fields: Mapping[str, int] = MappingProxyType(
517
+ {
518
+ DAY: "day",
519
+ HOUR: "hour",
520
+ MINUTE: "minute",
521
+ SECOND: "second",
522
+ }
523
+ )
511
524
 
512
- _inverted_fields = dict(zip(_fields.values(), _fields.keys()))
525
+ _inverted_fields: Mapping[int, str] = MappingProxyType(dict(zip(_fields.values(), _fields.keys())))
513
526
 
514
- def __init__(self, startField: Optional[int] = None, endField: Optional[int] = None):
527
+ def __init__(self, startField: Optional[int] = None, endField: Optional[int] = None) -> None: # noqa: D107
515
528
  super().__init__(DuckDBPyType("INTERVAL"))
516
529
  if startField is None and endField is None:
517
530
  # Default matched to scala side.
@@ -521,33 +534,34 @@ class DayTimeIntervalType(AtomicType):
521
534
  endField = startField
522
535
 
523
536
  fields = DayTimeIntervalType._fields
524
- if startField not in fields.keys() or endField not in fields.keys():
525
- raise RuntimeError("interval %s to %s is invalid" % (startField, endField))
526
- self.startField = cast(int, startField)
527
- self.endField = cast(int, endField)
537
+ if startField not in fields or endField not in fields:
538
+ msg = f"interval {startField} to {endField} is invalid"
539
+ raise RuntimeError(msg)
540
+ self.startField = cast("int", startField)
541
+ self.endField = cast("int", endField)
528
542
 
529
543
  def _str_repr(self) -> str:
530
544
  fields = DayTimeIntervalType._fields
531
545
  start_field_name = fields[self.startField]
532
546
  end_field_name = fields[self.endField]
533
547
  if start_field_name == end_field_name:
534
- return "interval %s" % start_field_name
548
+ return f"interval {start_field_name}"
535
549
  else:
536
- return "interval %s to %s" % (start_field_name, end_field_name)
550
+ return f"interval {start_field_name} to {end_field_name}"
537
551
 
538
552
  simpleString = _str_repr
539
553
 
540
- def __repr__(self) -> str:
541
- return "%s(%d, %d)" % (type(self).__name__, self.startField, self.endField)
554
+ def __repr__(self) -> str: # noqa: D105
555
+ return f"{type(self).__name__}({int(self.startField):d}, {int(self.endField):d})"
542
556
 
543
- def needConversion(self) -> bool:
557
+ def needConversion(self) -> bool: # noqa: D102
544
558
  return True
545
559
 
546
- def toInternal(self, dt: datetime.timedelta) -> Optional[int]:
560
+ def toInternal(self, dt: datetime.timedelta) -> Optional[int]: # noqa: D102
547
561
  if dt is not None:
548
562
  return (math.floor(dt.total_seconds()) * 1000000) + dt.microseconds
549
563
 
550
- def fromInternal(self, micros: int) -> Optional[datetime.timedelta]:
564
+ def fromInternal(self, micros: int) -> Optional[datetime.timedelta]: # noqa: D102
551
565
  if micros is not None:
552
566
  return datetime.timedelta(microseconds=micros)
553
567
 
@@ -562,7 +576,7 @@ class ArrayType(DataType):
562
576
  containsNull : bool, optional
563
577
  whether the array can contain null (None) values.
564
578
 
565
- Examples
579
+ Examples:
566
580
  --------
567
581
  >>> ArrayType(StringType()) == ArrayType(StringType(), True)
568
582
  True
@@ -570,30 +584,27 @@ class ArrayType(DataType):
570
584
  False
571
585
  """
572
586
 
573
- def __init__(self, elementType: DataType, containsNull: bool = True):
587
+ def __init__(self, elementType: DataType, containsNull: bool = True) -> None: # noqa: D107
574
588
  super().__init__(duckdb.list_type(elementType.duckdb_type))
575
- assert isinstance(elementType, DataType), "elementType %s should be an instance of %s" % (
576
- elementType,
577
- DataType,
578
- )
589
+ assert isinstance(elementType, DataType), f"elementType {elementType} should be an instance of {DataType}"
579
590
  self.elementType = elementType
580
591
  self.containsNull = containsNull
581
592
 
582
- def simpleString(self) -> str:
583
- return "array<%s>" % self.elementType.simpleString()
593
+ def simpleString(self) -> str: # noqa: D102
594
+ return f"array<{self.elementType.simpleString()}>"
584
595
 
585
- def __repr__(self) -> str:
586
- return "ArrayType(%s, %s)" % (self.elementType, str(self.containsNull))
596
+ def __repr__(self) -> str: # noqa: D105
597
+ return f"ArrayType({self.elementType}, {self.containsNull!s})"
587
598
 
588
- def needConversion(self) -> bool:
599
+ def needConversion(self) -> bool: # noqa: D102
589
600
  return self.elementType.needConversion()
590
601
 
591
- def toInternal(self, obj: List[Optional[T]]) -> List[Optional[T]]:
602
+ def toInternal(self, obj: list[Optional[T]]) -> list[Optional[T]]: # noqa: D102
592
603
  if not self.needConversion():
593
604
  return obj
594
605
  return obj and [self.elementType.toInternal(v) for v in obj]
595
606
 
596
- def fromInternal(self, obj: List[Optional[T]]) -> List[Optional[T]]:
607
+ def fromInternal(self, obj: list[Optional[T]]) -> list[Optional[T]]: # noqa: D102
597
608
  if not self.needConversion():
598
609
  return obj
599
610
  return obj and [self.elementType.fromInternal(v) for v in obj]
@@ -611,59 +622,44 @@ class MapType(DataType):
611
622
  valueContainsNull : bool, optional
612
623
  indicates whether values can contain null (None) values.
613
624
 
614
- Notes
625
+ Notes:
615
626
  -----
616
627
  Keys in a map data type are not allowed to be null (None).
617
628
 
618
- Examples
629
+ Examples:
619
630
  --------
620
- >>> (MapType(StringType(), IntegerType())
621
- ... == MapType(StringType(), IntegerType(), True))
631
+ >>> (MapType(StringType(), IntegerType()) == MapType(StringType(), IntegerType(), True))
622
632
  True
623
- >>> (MapType(StringType(), IntegerType(), False)
624
- ... == MapType(StringType(), FloatType()))
633
+ >>> (MapType(StringType(), IntegerType(), False) == MapType(StringType(), FloatType()))
625
634
  False
626
635
  """
627
636
 
628
- def __init__(self, keyType: DataType, valueType: DataType, valueContainsNull: bool = True):
637
+ def __init__(self, keyType: DataType, valueType: DataType, valueContainsNull: bool = True) -> None: # noqa: D107
629
638
  super().__init__(duckdb.map_type(keyType.duckdb_type, valueType.duckdb_type))
630
- assert isinstance(keyType, DataType), "keyType %s should be an instance of %s" % (
631
- keyType,
632
- DataType,
633
- )
634
- assert isinstance(valueType, DataType), "valueType %s should be an instance of %s" % (
635
- valueType,
636
- DataType,
637
- )
639
+ assert isinstance(keyType, DataType), f"keyType {keyType} should be an instance of {DataType}"
640
+ assert isinstance(valueType, DataType), f"valueType {valueType} should be an instance of {DataType}"
638
641
  self.keyType = keyType
639
642
  self.valueType = valueType
640
643
  self.valueContainsNull = valueContainsNull
641
644
 
642
- def simpleString(self) -> str:
643
- return "map<%s,%s>" % (
644
- self.keyType.simpleString(),
645
- self.valueType.simpleString(),
646
- )
645
+ def simpleString(self) -> str: # noqa: D102
646
+ return f"map<{self.keyType.simpleString()},{self.valueType.simpleString()}>"
647
647
 
648
- def __repr__(self) -> str:
649
- return "MapType(%s, %s, %s)" % (
650
- self.keyType,
651
- self.valueType,
652
- str(self.valueContainsNull),
653
- )
648
+ def __repr__(self) -> str: # noqa: D105
649
+ return f"MapType({self.keyType}, {self.valueType}, {self.valueContainsNull!s})"
654
650
 
655
- def needConversion(self) -> bool:
651
+ def needConversion(self) -> bool: # noqa: D102
656
652
  return self.keyType.needConversion() or self.valueType.needConversion()
657
653
 
658
- def toInternal(self, obj: Dict[T, Optional[U]]) -> Dict[T, Optional[U]]:
654
+ def toInternal(self, obj: dict[T, Optional[U]]) -> dict[T, Optional[U]]: # noqa: D102
659
655
  if not self.needConversion():
660
656
  return obj
661
- return obj and dict((self.keyType.toInternal(k), self.valueType.toInternal(v)) for k, v in obj.items())
657
+ return obj and {self.keyType.toInternal(k): self.valueType.toInternal(v) for k, v in obj.items()}
662
658
 
663
- def fromInternal(self, obj: Dict[T, Optional[U]]) -> Dict[T, Optional[U]]:
659
+ def fromInternal(self, obj: dict[T, Optional[U]]) -> dict[T, Optional[U]]: # noqa: D102
664
660
  if not self.needConversion():
665
661
  return obj
666
- return obj and dict((self.keyType.fromInternal(k), self.valueType.fromInternal(v)) for k, v in obj.items())
662
+ return obj and {self.keyType.fromInternal(k): self.valueType.fromInternal(v) for k, v in obj.items()}
667
663
 
668
664
 
669
665
  class StructField(DataType):
@@ -680,66 +676,58 @@ class StructField(DataType):
680
676
  metadata : dict, optional
681
677
  a dict from string to simple type that can be toInternald to JSON automatically
682
678
 
683
- Examples
679
+ Examples:
684
680
  --------
685
- >>> (StructField("f1", StringType(), True)
686
- ... == StructField("f1", StringType(), True))
681
+ >>> (StructField("f1", StringType(), True) == StructField("f1", StringType(), True))
687
682
  True
688
- >>> (StructField("f1", StringType(), True)
689
- ... == StructField("f2", StringType(), True))
683
+ >>> (StructField("f1", StringType(), True) == StructField("f2", StringType(), True))
690
684
  False
691
685
  """
692
686
 
693
- def __init__(
687
+ def __init__( # noqa: D107
694
688
  self,
695
689
  name: str,
696
690
  dataType: DataType,
697
691
  nullable: bool = True,
698
- metadata: Optional[Dict[str, Any]] = None,
699
- ):
692
+ metadata: Optional[dict[str, Any]] = None,
693
+ ) -> None:
700
694
  super().__init__(dataType.duckdb_type)
701
- assert isinstance(dataType, DataType), "dataType %s should be an instance of %s" % (
702
- dataType,
703
- DataType,
704
- )
705
- assert isinstance(name, str), "field name %s should be a string" % (name)
695
+ assert isinstance(dataType, DataType), f"dataType {dataType} should be an instance of {DataType}"
696
+ assert isinstance(name, str), f"field name {name} should be a string"
706
697
  self.name = name
707
698
  self.dataType = dataType
708
699
  self.nullable = nullable
709
700
  self.metadata = metadata or {}
710
701
 
711
- def simpleString(self) -> str:
712
- return "%s:%s" % (self.name, self.dataType.simpleString())
702
+ def simpleString(self) -> str: # noqa: D102
703
+ return f"{self.name}:{self.dataType.simpleString()}"
713
704
 
714
- def __repr__(self) -> str:
715
- return "StructField('%s', %s, %s)" % (
716
- self.name,
717
- self.dataType,
718
- str(self.nullable),
719
- )
705
+ def __repr__(self) -> str: # noqa: D105
706
+ return f"StructField('{self.name}', {self.dataType}, {self.nullable!s})"
720
707
 
721
- def needConversion(self) -> bool:
708
+ def needConversion(self) -> bool: # noqa: D102
722
709
  return self.dataType.needConversion()
723
710
 
724
- def toInternal(self, obj: T) -> T:
711
+ def toInternal(self, obj: T) -> T: # noqa: D102
725
712
  return self.dataType.toInternal(obj)
726
713
 
727
- def fromInternal(self, obj: T) -> T:
714
+ def fromInternal(self, obj: T) -> T: # noqa: D102
728
715
  return self.dataType.fromInternal(obj)
729
716
 
730
- def typeName(self) -> str: # type: ignore[override]
731
- raise TypeError("StructField does not have typeName. " "Use typeName on its type explicitly instead.")
717
+ def typeName(self) -> str: # type: ignore[override] # noqa: D102
718
+ msg = "StructField does not have typeName. Use typeName on its type explicitly instead."
719
+ raise TypeError(msg)
732
720
 
733
721
 
734
722
  class StructType(DataType):
735
- """Struct type, consisting of a list of :class:`StructField`.
723
+ r"""Struct type, consisting of a list of :class:`StructField`.
736
724
 
737
725
  This is the data type representing a :class:`Row`.
738
726
 
739
727
  Iterating a :class:`StructType` will iterate over its :class:`StructField`\\s.
740
728
  A contained :class:`StructField` can be accessed by its name or position.
741
729
 
742
- Examples
730
+ Examples:
743
731
  --------
744
732
  >>> struct1 = StructType([StructField("f1", StringType(), True)])
745
733
  >>> struct1["f1"]
@@ -752,16 +740,17 @@ class StructType(DataType):
752
740
  >>> struct1 == struct2
753
741
  True
754
742
  >>> struct1 = StructType([StructField("f1", StringType(), True)])
755
- >>> struct2 = StructType([StructField("f1", StringType(), True),
756
- ... StructField("f2", IntegerType(), False)])
743
+ >>> struct2 = StructType(
744
+ ... [StructField("f1", StringType(), True), StructField("f2", IntegerType(), False)]
745
+ ... )
757
746
  >>> struct1 == struct2
758
747
  False
759
748
  """
760
749
 
761
- def _update_internal_duckdb_type(self):
750
+ def _update_internal_duckdb_type(self) -> None:
762
751
  self.duckdb_type = duckdb.struct_type(dict(zip(self.names, [x.duckdb_type for x in self.fields])))
763
752
 
764
- def __init__(self, fields: Optional[List[StructField]] = None):
753
+ def __init__(self, fields: Optional[list[StructField]] = None) -> None: # noqa: D107
765
754
  if not fields:
766
755
  self.fields = []
767
756
  self.names = []
@@ -780,23 +769,20 @@ class StructType(DataType):
780
769
  field: str,
781
770
  data_type: Union[str, DataType],
782
771
  nullable: bool = True,
783
- metadata: Optional[Dict[str, Any]] = None,
784
- ) -> "StructType":
785
- ...
772
+ metadata: Optional[dict[str, Any]] = None,
773
+ ) -> "StructType": ...
786
774
 
787
775
  @overload
788
- def add(self, field: StructField) -> "StructType":
789
- ...
776
+ def add(self, field: StructField) -> "StructType": ...
790
777
 
791
778
  def add(
792
779
  self,
793
780
  field: Union[str, StructField],
794
781
  data_type: Optional[Union[str, DataType]] = None,
795
782
  nullable: bool = True,
796
- metadata: Optional[Dict[str, Any]] = None,
783
+ metadata: Optional[dict[str, Any]] = None,
797
784
  ) -> "StructType":
798
- """
799
- Construct a :class:`StructType` by adding new elements to it, to define the schema.
785
+ r"""Construct a :class:`StructType` by adding new elements to it, to define the schema.
800
786
  The method accepts either:
801
787
 
802
788
  a) A single parameter which is a :class:`StructField` object.
@@ -815,11 +801,11 @@ class StructType(DataType):
815
801
  metadata : dict, optional
816
802
  Any additional metadata (default None)
817
803
 
818
- Returns
804
+ Returns:
819
805
  -------
820
806
  :class:`StructType`
821
807
 
822
- Examples
808
+ Examples:
823
809
  --------
824
810
  >>> struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
825
811
  >>> struct2 = StructType([StructField("f1", StringType(), True), \\
@@ -834,13 +820,14 @@ class StructType(DataType):
834
820
  >>> struct2 = StructType([StructField("f1", StringType(), True)])
835
821
  >>> struct1 == struct2
836
822
  True
837
- """
823
+ """ # noqa: D205, D415
838
824
  if isinstance(field, StructField):
839
825
  self.fields.append(field)
840
826
  self.names.append(field.name)
841
827
  else:
842
828
  if isinstance(field, str) and data_type is None:
843
- raise ValueError("Must specify DataType if passing name of struct_field to create.")
829
+ msg = "Must specify DataType if passing name of struct_field to create."
830
+ raise ValueError(msg)
844
831
  else:
845
832
  data_type_f = data_type
846
833
  self.fields.append(StructField(field, data_type_f, nullable, metadata))
@@ -852,7 +839,7 @@ class StructType(DataType):
852
839
  return self
853
840
 
854
841
  def __iter__(self) -> Iterator[StructField]:
855
- """Iterate the fields"""
842
+ """Iterate the fields."""
856
843
  return iter(self.fields)
857
844
 
858
845
  def __len__(self) -> int:
@@ -865,24 +852,30 @@ class StructType(DataType):
865
852
  for field in self:
866
853
  if field.name == key:
867
854
  return field
868
- raise KeyError("No StructField named {0}".format(key))
855
+ msg = f"No StructField named {key}"
856
+ raise KeyError(msg)
869
857
  elif isinstance(key, int):
870
858
  try:
871
859
  return self.fields[key]
872
860
  except IndexError:
873
- raise IndexError("StructType index out of range")
861
+ msg = "StructType index out of range"
862
+ raise IndexError(msg) # noqa: B904
874
863
  elif isinstance(key, slice):
875
864
  return StructType(self.fields[key])
876
865
  else:
877
- raise TypeError("StructType keys should be strings, integers or slices")
866
+ msg = "StructType keys should be strings, integers or slices"
867
+ raise TypeError(msg)
878
868
 
879
- def simpleString(self) -> str:
880
- return "struct<%s>" % (",".join(f.simpleString() for f in self))
869
+ def simpleString(self) -> str: # noqa: D102
870
+ return "struct<{}>".format(",".join(f.simpleString() for f in self))
881
871
 
882
- def __repr__(self) -> str:
883
- return "StructType([%s])" % ", ".join(str(field) for field in self)
872
+ def __repr__(self) -> str: # noqa: D105
873
+ return "StructType([{}])".format(", ".join(str(field) for field in self))
884
874
 
885
- def extract_types_and_names(self) -> Tuple[List[str], List[str]]:
875
+ def __contains__(self, item: str) -> bool: # noqa: D105
876
+ return item in self.names
877
+
878
+ def extract_types_and_names(self) -> tuple[list[str], list[str]]: # noqa: D102
886
879
  names = []
887
880
  types = []
888
881
  for f in self.fields:
@@ -890,11 +883,10 @@ class StructType(DataType):
890
883
  names.append(f.name)
891
884
  return (types, names)
892
885
 
893
- def fieldNames(self) -> List[str]:
894
- """
895
- Returns all field names in a list.
886
+ def fieldNames(self) -> list[str]:
887
+ """Returns all field names in a list.
896
888
 
897
- Examples
889
+ Examples:
898
890
  --------
899
891
  >>> struct = StructType([StructField("f1", StringType(), True)])
900
892
  >>> struct.fieldNames()
@@ -902,11 +894,11 @@ class StructType(DataType):
902
894
  """
903
895
  return list(self.names)
904
896
 
905
- def needConversion(self) -> bool:
897
+ def needConversion(self) -> bool: # noqa: D102
906
898
  # We need convert Row()/namedtuple into tuple()
907
899
  return True
908
900
 
909
- def toInternal(self, obj: Tuple) -> Tuple:
901
+ def toInternal(self, obj: tuple) -> tuple: # noqa: D102
910
902
  if obj is None:
911
903
  return
912
904
 
@@ -926,7 +918,8 @@ class StructType(DataType):
926
918
  for n, f, c in zip(self.names, self.fields, self._needConversion)
927
919
  )
928
920
  else:
929
- raise ValueError("Unexpected tuple %r with StructType" % obj)
921
+ msg = f"Unexpected tuple {obj!r} with StructType"
922
+ raise ValueError(msg)
930
923
  else:
931
924
  if isinstance(obj, dict):
932
925
  return tuple(obj.get(n) for n in self.names)
@@ -936,16 +929,17 @@ class StructType(DataType):
936
929
  d = obj.__dict__
937
930
  return tuple(d.get(n) for n in self.names)
938
931
  else:
939
- raise ValueError("Unexpected tuple %r with StructType" % obj)
932
+ msg = f"Unexpected tuple {obj!r} with StructType"
933
+ raise ValueError(msg)
940
934
 
941
- def fromInternal(self, obj: Tuple) -> "Row":
935
+ def fromInternal(self, obj: tuple) -> "Row": # noqa: D102
942
936
  if obj is None:
943
937
  return
944
938
  if isinstance(obj, Row):
945
939
  # it's already converted by pickler
946
940
  return obj
947
941
 
948
- values: Union[Tuple, List]
942
+ values: Union[tuple, list]
949
943
  if self._needSerializeAnyField:
950
944
  # Only calling fromInternal function for fields that need conversion
951
945
  values = [f.fromInternal(v) if c else v for f, v, c in zip(self.fields, obj, self._needConversion)]
@@ -955,7 +949,7 @@ class StructType(DataType):
955
949
 
956
950
 
957
951
  class UnionType(DataType):
958
- def __init__(self):
952
+ def __init__(self) -> None:
959
953
  raise ContributionsAcceptedError
960
954
 
961
955
 
@@ -965,7 +959,7 @@ class UserDefinedType(DataType):
965
959
  .. note:: WARN: Spark Internal Use Only
966
960
  """
967
961
 
968
- def __init__(self):
962
+ def __init__(self) -> None:
969
963
  raise ContributionsAcceptedError
970
964
 
971
965
  @classmethod
@@ -974,24 +968,21 @@ class UserDefinedType(DataType):
974
968
 
975
969
  @classmethod
976
970
  def sqlType(cls) -> DataType:
977
- """
978
- Underlying SQL storage type for this UDT.
979
- """
980
- raise NotImplementedError("UDT must implement sqlType().")
971
+ """Underlying SQL storage type for this UDT."""
972
+ msg = "UDT must implement sqlType()."
973
+ raise NotImplementedError(msg)
981
974
 
982
975
  @classmethod
983
976
  def module(cls) -> str:
984
- """
985
- The Python module of the UDT.
986
- """
987
- raise NotImplementedError("UDT must implement module().")
977
+ """The Python module of the UDT."""
978
+ msg = "UDT must implement module()."
979
+ raise NotImplementedError(msg)
988
980
 
989
981
  @classmethod
990
982
  def scalaUDT(cls) -> str:
991
- """
992
- The class name of the paired Scala UDT (could be '', if there
983
+ """The class name of the paired Scala UDT (could be '', if there
993
984
  is no corresponding one).
994
- """
985
+ """ # noqa: D205
995
986
  return ""
996
987
 
997
988
  def needConversion(self) -> bool:
@@ -999,42 +990,38 @@ class UserDefinedType(DataType):
999
990
 
1000
991
  @classmethod
1001
992
  def _cachedSqlType(cls) -> DataType:
1002
- """
1003
- Cache the sqlType() into class, because it's heavily used in `toInternal`.
1004
- """
993
+ """Cache the sqlType() into class, because it's heavily used in `toInternal`."""
1005
994
  if not hasattr(cls, "_cached_sql_type"):
1006
995
  cls._cached_sql_type = cls.sqlType() # type: ignore[attr-defined]
1007
996
  return cls._cached_sql_type # type: ignore[attr-defined]
1008
997
 
1009
- def toInternal(self, obj: Any) -> Any:
998
+ def toInternal(self, obj: Any) -> Any: # noqa: ANN401
1010
999
  if obj is not None:
1011
1000
  return self._cachedSqlType().toInternal(self.serialize(obj))
1012
1001
 
1013
- def fromInternal(self, obj: Any) -> Any:
1002
+ def fromInternal(self, obj: Any) -> Any: # noqa: ANN401
1014
1003
  v = self._cachedSqlType().fromInternal(obj)
1015
1004
  if v is not None:
1016
1005
  return self.deserialize(v)
1017
1006
 
1018
- def serialize(self, obj: Any) -> Any:
1019
- """
1020
- Converts a user-type object into a SQL datum.
1021
- """
1022
- raise NotImplementedError("UDT must implement toInternal().")
1007
+ def serialize(self, obj: Any) -> NoReturn: # noqa: ANN401
1008
+ """Converts a user-type object into a SQL datum."""
1009
+ msg = "UDT must implement toInternal()."
1010
+ raise NotImplementedError(msg)
1023
1011
 
1024
- def deserialize(self, datum: Any) -> Any:
1025
- """
1026
- Converts a SQL datum into a user-type object.
1027
- """
1028
- raise NotImplementedError("UDT must implement fromInternal().")
1012
+ def deserialize(self, datum: Any) -> NoReturn: # noqa: ANN401
1013
+ """Converts a SQL datum into a user-type object."""
1014
+ msg = "UDT must implement fromInternal()."
1015
+ raise NotImplementedError(msg)
1029
1016
 
1030
1017
  def simpleString(self) -> str:
1031
1018
  return "udt"
1032
1019
 
1033
- def __eq__(self, other: Any) -> bool:
1034
- return type(self) == type(other)
1020
+ def __eq__(self, other: object) -> bool:
1021
+ return type(self) is type(other)
1035
1022
 
1036
1023
 
1037
- _atomic_types: List[Type[DataType]] = [
1024
+ _atomic_types: list[type[DataType]] = [
1038
1025
  StringType,
1039
1026
  BinaryType,
1040
1027
  BooleanType,
@@ -1050,33 +1037,28 @@ _atomic_types: List[Type[DataType]] = [
1050
1037
  TimestampNTZType,
1051
1038
  NullType,
1052
1039
  ]
1053
- _all_atomic_types: Dict[str, Type[DataType]] = dict((t.typeName(), t) for t in _atomic_types)
1040
+ _all_atomic_types: dict[str, type[DataType]] = {t.typeName(): t for t in _atomic_types}
1054
1041
 
1055
- _complex_types: List[Type[Union[ArrayType, MapType, StructType]]] = [
1042
+ _complex_types: list[type[Union[ArrayType, MapType, StructType]]] = [
1056
1043
  ArrayType,
1057
1044
  MapType,
1058
1045
  StructType,
1059
1046
  ]
1060
- _all_complex_types: Dict[str, Type[Union[ArrayType, MapType, StructType]]] = dict(
1061
- (v.typeName(), v) for v in _complex_types
1062
- )
1047
+ _all_complex_types: dict[str, type[Union[ArrayType, MapType, StructType]]] = {v.typeName(): v for v in _complex_types}
1063
1048
 
1064
- import re
1065
1049
 
1066
1050
  _FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)")
1067
1051
  _INTERVAL_DAYTIME = re.compile(r"interval (day|hour|minute|second)( to (day|hour|minute|second))?")
1068
1052
 
1069
1053
 
1070
- def _create_row(fields: Union["Row", List[str]], values: Union[Tuple[Any, ...], List[Any]]) -> "Row":
1054
+ def _create_row(fields: Union["Row", list[str]], values: Union[tuple[Any, ...], list[Any]]) -> "Row":
1071
1055
  row = Row(*values)
1072
1056
  row.__fields__ = fields
1073
1057
  return row
1074
1058
 
1075
1059
 
1076
1060
  class Row(tuple):
1077
-
1078
- """
1079
- A row in :class:`DataFrame`.
1061
+ """A row in :class:`DataFrame`.
1080
1062
  The fields in it can be accessed:
1081
1063
 
1082
1064
  * like attributes (``row.key``)
@@ -1093,18 +1075,18 @@ class Row(tuple):
1093
1075
  field names sorted alphabetically and will be ordered in the position as
1094
1076
  entered.
1095
1077
 
1096
- Examples
1078
+ Examples:
1097
1079
  --------
1098
1080
  >>> row = Row(name="Alice", age=11)
1099
1081
  >>> row
1100
1082
  Row(name='Alice', age=11)
1101
- >>> row['name'], row['age']
1083
+ >>> row["name"], row["age"]
1102
1084
  ('Alice', 11)
1103
1085
  >>> row.name, row.age
1104
1086
  ('Alice', 11)
1105
- >>> 'name' in row
1087
+ >>> "name" in row
1106
1088
  True
1107
- >>> 'wrong_key' in row
1089
+ >>> "wrong_key" in row
1108
1090
  False
1109
1091
 
1110
1092
  Row also can be used to create another Row like class, then it
@@ -1113,9 +1095,9 @@ class Row(tuple):
1113
1095
  >>> Person = Row("name", "age")
1114
1096
  >>> Person
1115
1097
  <Row('name', 'age')>
1116
- >>> 'name' in Person
1098
+ >>> "name" in Person
1117
1099
  True
1118
- >>> 'wrong_key' in Person
1100
+ >>> "wrong_key" in Person
1119
1101
  False
1120
1102
  >>> Person("Alice", 11)
1121
1103
  Row(name='Alice', age=11)
@@ -1127,19 +1109,18 @@ class Row(tuple):
1127
1109
  >>> row2 = Row(name="Alice", age=11)
1128
1110
  >>> row1 == row2
1129
1111
  True
1130
- """
1112
+ """ # noqa: D205, D415
1131
1113
 
1132
1114
  @overload
1133
- def __new__(cls, *args: str) -> "Row":
1134
- ...
1115
+ def __new__(cls, *args: str) -> "Row": ...
1135
1116
 
1136
1117
  @overload
1137
- def __new__(cls, **kwargs: Any) -> "Row":
1138
- ...
1118
+ def __new__(cls, **kwargs: Any) -> "Row": ... # noqa: ANN401
1139
1119
 
1140
- def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row":
1120
+ def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row": # noqa: D102
1141
1121
  if args and kwargs:
1142
- raise ValueError("Can not use both args " "and kwargs to create Row")
1122
+ msg = "Can not use both args and kwargs to create Row"
1123
+ raise ValueError(msg)
1143
1124
  if kwargs:
1144
1125
  # create row objects
1145
1126
  row = tuple.__new__(cls, list(kwargs.values()))
@@ -1149,16 +1130,15 @@ class Row(tuple):
1149
1130
  # create row class or objects
1150
1131
  return tuple.__new__(cls, args)
1151
1132
 
1152
- def asDict(self, recursive: bool = False) -> Dict[str, Any]:
1153
- """
1154
- Return as a dict
1133
+ def asDict(self, recursive: bool = False) -> dict[str, Any]:
1134
+ """Return as a dict.
1155
1135
 
1156
1136
  Parameters
1157
1137
  ----------
1158
1138
  recursive : bool, optional
1159
1139
  turns the nested Rows to dict (default: False).
1160
1140
 
1161
- Notes
1141
+ Notes:
1162
1142
  -----
1163
1143
  If a row contains duplicate field names, e.g., the rows of a join
1164
1144
  between two :class:`DataFrame` that both have the fields of same names,
@@ -1166,28 +1146,29 @@ class Row(tuple):
1166
1146
  will also return one of the duplicate fields, however returned value might
1167
1147
  be different to ``asDict``.
1168
1148
 
1169
- Examples
1149
+ Examples:
1170
1150
  --------
1171
- >>> Row(name="Alice", age=11).asDict() == {'name': 'Alice', 'age': 11}
1151
+ >>> Row(name="Alice", age=11).asDict() == {"name": "Alice", "age": 11}
1172
1152
  True
1173
- >>> row = Row(key=1, value=Row(name='a', age=2))
1174
- >>> row.asDict() == {'key': 1, 'value': Row(name='a', age=2)}
1153
+ >>> row = Row(key=1, value=Row(name="a", age=2))
1154
+ >>> row.asDict() == {"key": 1, "value": Row(name="a", age=2)}
1175
1155
  True
1176
- >>> row.asDict(True) == {'key': 1, 'value': {'name': 'a', 'age': 2}}
1156
+ >>> row.asDict(True) == {"key": 1, "value": {"name": "a", "age": 2}}
1177
1157
  True
1178
1158
  """
1179
1159
  if not hasattr(self, "__fields__"):
1180
- raise TypeError("Cannot convert a Row class into dict")
1160
+ msg = "Cannot convert a Row class into dict"
1161
+ raise TypeError(msg)
1181
1162
 
1182
1163
  if recursive:
1183
1164
 
1184
- def conv(obj: Any) -> Any:
1165
+ def conv(obj: Union[Row, list, dict, object]) -> Union[list, dict, object]:
1185
1166
  if isinstance(obj, Row):
1186
1167
  return obj.asDict(True)
1187
1168
  elif isinstance(obj, list):
1188
1169
  return [conv(o) for o in obj]
1189
1170
  elif isinstance(obj, dict):
1190
- return dict((k, conv(v)) for k, v in obj.items())
1171
+ return {k: conv(v) for k, v in obj.items()}
1191
1172
  else:
1192
1173
  return obj
1193
1174
 
@@ -1195,35 +1176,34 @@ class Row(tuple):
1195
1176
  else:
1196
1177
  return dict(zip(self.__fields__, self))
1197
1178
 
1198
- def __contains__(self, item: Any) -> bool:
1179
+ def __contains__(self, item: Any) -> bool: # noqa: D105, ANN401
1199
1180
  if hasattr(self, "__fields__"):
1200
1181
  return item in self.__fields__
1201
1182
  else:
1202
- return super(Row, self).__contains__(item)
1183
+ return super().__contains__(item)
1203
1184
 
1204
1185
  # let object acts like class
1205
- def __call__(self, *args: Any) -> "Row":
1206
- """create new Row object"""
1186
+ def __call__(self, *args: Any) -> "Row": # noqa: ANN401
1187
+ """Create new Row object."""
1207
1188
  if len(args) > len(self):
1208
- raise ValueError(
1209
- "Can not create Row with fields %s, expected %d values " "but got %s" % (self, len(self), args)
1210
- )
1189
+ msg = f"Can not create Row with fields {self}, expected {len(self):d} values but got {args}"
1190
+ raise ValueError(msg)
1211
1191
  return _create_row(self, args)
1212
1192
 
1213
- def __getitem__(self, item: Any) -> Any:
1193
+ def __getitem__(self, item: Any) -> Any: # noqa: D105, ANN401
1214
1194
  if isinstance(item, (int, slice)):
1215
- return super(Row, self).__getitem__(item)
1195
+ return super().__getitem__(item)
1216
1196
  try:
1217
1197
  # it will be slow when it has many fields,
1218
1198
  # but this will not be used in normal cases
1219
1199
  idx = self.__fields__.index(item)
1220
- return super(Row, self).__getitem__(idx)
1200
+ return super().__getitem__(idx)
1221
1201
  except IndexError:
1222
- raise KeyError(item)
1202
+ raise KeyError(item) # noqa: B904
1223
1203
  except ValueError:
1224
- raise ValueError(item)
1204
+ raise ValueError(item) # noqa: B904
1225
1205
 
1226
- def __getattr__(self, item: str) -> Any:
1206
+ def __getattr__(self, item: str) -> Any: # noqa: D105, ANN401
1227
1207
  if item.startswith("__"):
1228
1208
  raise AttributeError(item)
1229
1209
  try:
@@ -1232,18 +1212,19 @@ class Row(tuple):
1232
1212
  idx = self.__fields__.index(item)
1233
1213
  return self[idx]
1234
1214
  except IndexError:
1235
- raise AttributeError(item)
1215
+ raise AttributeError(item) # noqa: B904
1236
1216
  except ValueError:
1237
- raise AttributeError(item)
1217
+ raise AttributeError(item) # noqa: B904
1238
1218
 
1239
- def __setattr__(self, key: Any, value: Any) -> None:
1219
+ def __setattr__(self, key: Any, value: Any) -> None: # noqa: D105, ANN401
1240
1220
  if key != "__fields__":
1241
- raise RuntimeError("Row is read-only")
1221
+ msg = "Row is read-only"
1222
+ raise RuntimeError(msg)
1242
1223
  self.__dict__[key] = value
1243
1224
 
1244
1225
  def __reduce__(
1245
1226
  self,
1246
- ) -> Union[str, Tuple[Any, ...]]:
1227
+ ) -> Union[str, tuple[Any, ...]]:
1247
1228
  """Returns a tuple so Python knows how to pickle Row."""
1248
1229
  if hasattr(self, "__fields__"):
1249
1230
  return (_create_row, (self.__fields__, tuple(self)))
@@ -1253,6 +1234,6 @@ class Row(tuple):
1253
1234
  def __repr__(self) -> str:
1254
1235
  """Printable representation of Row used in Python REPL."""
1255
1236
  if hasattr(self, "__fields__"):
1256
- return "Row(%s)" % ", ".join("%s=%r" % (k, v) for k, v in zip(self.__fields__, tuple(self)))
1237
+ return "Row({})".format(", ".join(f"{k}={v!r}" for k, v in zip(self.__fields__, tuple(self))))
1257
1238
  else:
1258
- return "<Row(%s)>" % ", ".join("%r" % field for field in self)
1239
+ return "<Row({})>".format(", ".join(f"{field!r}" for field in self))