duckdb 1.5.0.dev37__cp312-cp312-win_amd64.whl → 1.5.0.dev94__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckdb might be problematic. Click here for more details.

Files changed (56) hide show
  1. _duckdb-stubs/__init__.pyi +1443 -0
  2. _duckdb-stubs/_func.pyi +46 -0
  3. _duckdb-stubs/_sqltypes.pyi +75 -0
  4. _duckdb.cp312-win_amd64.pyd +0 -0
  5. adbc_driver_duckdb/__init__.py +49 -0
  6. adbc_driver_duckdb/dbapi.py +115 -0
  7. duckdb/__init__.py +341 -435
  8. duckdb/_dbapi_type_object.py +231 -0
  9. duckdb/_version.py +22 -0
  10. duckdb/bytes_io_wrapper.py +12 -9
  11. duckdb/experimental/__init__.py +2 -1
  12. duckdb/experimental/spark/__init__.py +3 -4
  13. duckdb/experimental/spark/_globals.py +8 -8
  14. duckdb/experimental/spark/_typing.py +7 -9
  15. duckdb/experimental/spark/conf.py +16 -15
  16. duckdb/experimental/spark/context.py +60 -44
  17. duckdb/experimental/spark/errors/__init__.py +33 -35
  18. duckdb/experimental/spark/errors/error_classes.py +1 -1
  19. duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
  20. duckdb/experimental/spark/errors/exceptions/base.py +39 -88
  21. duckdb/experimental/spark/errors/utils.py +11 -16
  22. duckdb/experimental/spark/exception.py +9 -6
  23. duckdb/experimental/spark/sql/__init__.py +5 -5
  24. duckdb/experimental/spark/sql/_typing.py +8 -15
  25. duckdb/experimental/spark/sql/catalog.py +21 -20
  26. duckdb/experimental/spark/sql/column.py +48 -55
  27. duckdb/experimental/spark/sql/conf.py +9 -8
  28. duckdb/experimental/spark/sql/dataframe.py +185 -233
  29. duckdb/experimental/spark/sql/functions.py +1222 -1248
  30. duckdb/experimental/spark/sql/group.py +56 -52
  31. duckdb/experimental/spark/sql/readwriter.py +80 -94
  32. duckdb/experimental/spark/sql/session.py +64 -59
  33. duckdb/experimental/spark/sql/streaming.py +9 -10
  34. duckdb/experimental/spark/sql/type_utils.py +67 -65
  35. duckdb/experimental/spark/sql/types.py +309 -345
  36. duckdb/experimental/spark/sql/udf.py +6 -6
  37. duckdb/filesystem.py +26 -16
  38. duckdb/func/__init__.py +3 -0
  39. duckdb/functional/__init__.py +12 -16
  40. duckdb/polars_io.py +130 -83
  41. duckdb/query_graph/__main__.py +91 -96
  42. duckdb/sqltypes/__init__.py +63 -0
  43. duckdb/typing/__init__.py +18 -8
  44. duckdb/udf.py +10 -5
  45. duckdb/value/__init__.py +1 -0
  46. duckdb/value/constant/__init__.py +62 -60
  47. {duckdb-1.5.0.dev37.dist-info → duckdb-1.5.0.dev94.dist-info}/METADATA +12 -4
  48. duckdb-1.5.0.dev94.dist-info/RECORD +52 -0
  49. duckdb/__init__.pyi +0 -713
  50. duckdb/functional/__init__.pyi +0 -31
  51. duckdb/typing/__init__.pyi +0 -36
  52. duckdb/value/constant/__init__.pyi +0 -115
  53. duckdb-1.5.0.dev37.dist-info/RECORD +0 -47
  54. /duckdb/{value/__init__.pyi → py.typed} +0 -0
  55. {duckdb-1.5.0.dev37.dist-info → duckdb-1.5.0.dev94.dist-info}/WHEEL +0 -0
  56. {duckdb-1.5.0.dev37.dist-info → duckdb-1.5.0.dev94.dist-info}/licenses/LICENSE +0 -0
@@ -1,28 +1,28 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'spark' folder.
1
+ # ruff: noqa: D100
2
+ # This code is based on code from Apache Spark under the license found in the LICENSE
3
+ # file located in the 'spark' folder.
2
4
 
5
+ import calendar
6
+ import datetime
7
+ import math
8
+ import re
9
+ import time
10
+ from builtins import tuple
11
+ from collections.abc import Iterator, Mapping
12
+ from types import MappingProxyType
3
13
  from typing import (
4
- cast,
5
- overload,
6
- Dict,
7
- Optional,
8
- List,
9
- Tuple,
10
14
  Any,
11
- Union,
12
- Type,
13
- TypeVar,
14
15
  ClassVar,
15
- Iterator,
16
+ NoReturn,
17
+ Optional,
18
+ TypeVar,
19
+ Union,
20
+ cast,
21
+ overload,
16
22
  )
17
- from builtins import tuple
18
- import datetime
19
- import calendar
20
- import time
21
- import math
22
- import re
23
23
 
24
24
  import duckdb
25
- from duckdb.typing import DuckDBPyType
25
+ from duckdb.sqltypes import DuckDBPyType
26
26
 
27
27
  from ..exception import ContributionsAcceptedError
28
28
 
@@ -30,105 +30,100 @@ T = TypeVar("T")
30
30
  U = TypeVar("U")
31
31
 
32
32
  __all__ = [
33
- "DataType",
34
- "NullType",
35
- "StringType",
33
+ "ArrayType",
36
34
  "BinaryType",
37
- "UUIDType",
38
35
  "BitstringType",
39
36
  "BooleanType",
37
+ "ByteType",
38
+ "DataType",
40
39
  "DateType",
41
- "TimestampType",
42
- "TimestampNTZType",
43
- "TimestampNanosecondNTZType",
44
- "TimestampMilisecondNTZType",
45
- "TimestampSecondNTZType",
46
- "TimeType",
47
- "TimeNTZType",
40
+ "DayTimeIntervalType",
48
41
  "DecimalType",
49
42
  "DoubleType",
50
43
  "FloatType",
51
- "ByteType",
52
- "UnsignedByteType",
53
- "ShortType",
54
- "UnsignedShortType",
44
+ "HugeIntegerType",
55
45
  "IntegerType",
56
- "UnsignedIntegerType",
57
46
  "LongType",
58
- "UnsignedLongType",
59
- "HugeIntegerType",
60
- "UnsignedHugeIntegerType",
61
- "DayTimeIntervalType",
62
- "Row",
63
- "ArrayType",
64
47
  "MapType",
48
+ "NullType",
49
+ "Row",
50
+ "ShortType",
51
+ "StringType",
65
52
  "StructField",
66
53
  "StructType",
54
+ "TimeNTZType",
55
+ "TimeType",
56
+ "TimestampMilisecondNTZType",
57
+ "TimestampNTZType",
58
+ "TimestampNanosecondNTZType",
59
+ "TimestampSecondNTZType",
60
+ "TimestampType",
61
+ "UUIDType",
62
+ "UnsignedByteType",
63
+ "UnsignedHugeIntegerType",
64
+ "UnsignedIntegerType",
65
+ "UnsignedLongType",
66
+ "UnsignedShortType",
67
67
  ]
68
68
 
69
69
 
70
70
  class DataType:
71
71
  """Base class for data types."""
72
72
 
73
- def __init__(self, duckdb_type):
73
+ def __init__(self, duckdb_type: DuckDBPyType) -> None: # noqa: D107
74
74
  self.duckdb_type = duckdb_type
75
75
 
76
- def __repr__(self) -> str:
76
+ def __repr__(self) -> str: # noqa: D105
77
77
  return self.__class__.__name__ + "()"
78
78
 
79
- def __hash__(self) -> int:
79
+ def __hash__(self) -> int: # noqa: D105
80
80
  return hash(str(self))
81
81
 
82
- def __eq__(self, other: Any) -> bool:
82
+ def __eq__(self, other: object) -> bool: # noqa: D105
83
83
  return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
84
84
 
85
- def __ne__(self, other: Any) -> bool:
85
+ def __ne__(self, other: object) -> bool: # noqa: D105
86
86
  return not self.__eq__(other)
87
87
 
88
88
  @classmethod
89
- def typeName(cls) -> str:
89
+ def typeName(cls) -> str: # noqa: D102
90
90
  return cls.__name__[:-4].lower()
91
91
 
92
- def simpleString(self) -> str:
92
+ def simpleString(self) -> str: # noqa: D102
93
93
  return self.typeName()
94
94
 
95
- def jsonValue(self) -> Union[str, Dict[str, Any]]:
95
+ def jsonValue(self) -> Union[str, dict[str, Any]]: # noqa: D102
96
96
  raise ContributionsAcceptedError
97
97
 
98
- def json(self) -> str:
98
+ def json(self) -> str: # noqa: D102
99
99
  raise ContributionsAcceptedError
100
100
 
101
101
  def needConversion(self) -> bool:
102
- """
103
- Does this type needs conversion between Python object and internal SQL object.
102
+ """Does this type needs conversion between Python object and internal SQL object.
104
103
 
105
104
  This is used to avoid the unnecessary conversion for ArrayType/MapType/StructType.
106
105
  """
107
106
  return False
108
107
 
109
- def toInternal(self, obj: Any) -> Any:
110
- """
111
- Converts a Python object into an internal SQL object.
112
- """
108
+ def toInternal(self, obj: Any) -> Any: # noqa: ANN401
109
+ """Converts a Python object into an internal SQL object."""
113
110
  return obj
114
111
 
115
- def fromInternal(self, obj: Any) -> Any:
116
- """
117
- Converts an internal SQL object into a native Python object.
118
- """
112
+ def fromInternal(self, obj: Any) -> Any: # noqa: ANN401
113
+ """Converts an internal SQL object into a native Python object."""
119
114
  return obj
120
115
 
121
116
 
122
117
  # This singleton pattern does not work with pickle, you will get
123
118
  # another object after pickle and unpickle
124
119
  class DataTypeSingleton(type):
125
- """Metaclass for DataType"""
120
+ """Metaclass for DataType."""
126
121
 
127
- _instances: ClassVar[Dict[Type["DataTypeSingleton"], "DataTypeSingleton"]] = {}
122
+ _instances: ClassVar[dict[type["DataTypeSingleton"], "DataTypeSingleton"]] = {}
128
123
 
129
- def __call__(cls: Type[T]) -> T: # type: ignore[override]
124
+ def __call__(cls: type[T]) -> T: # type: ignore[override]
130
125
  if cls not in cls._instances: # type: ignore[attr-defined]
131
- cls._instances[cls] = super(DataTypeSingleton, cls).__call__() # type: ignore[misc, attr-defined]
126
+ cls._instances[cls] = super().__call__() # type: ignore[misc, attr-defined]
132
127
  return cls._instances[cls] # type: ignore[attr-defined]
133
128
 
134
129
 
@@ -138,17 +133,18 @@ class NullType(DataType, metaclass=DataTypeSingleton):
138
133
  The data type representing None, used for the types that cannot be inferred.
139
134
  """
140
135
 
141
- def __init__(self):
136
+ def __init__(self) -> None: # noqa: D107
142
137
  super().__init__(DuckDBPyType("NULL"))
143
138
 
144
139
  @classmethod
145
- def typeName(cls) -> str:
140
+ def typeName(cls) -> str: # noqa: D102
146
141
  return "void"
147
142
 
148
143
 
149
144
  class AtomicType(DataType):
150
145
  """An internal type used to represent everything that is not
151
- null, UDTs, arrays, structs, and maps."""
146
+ null, UDTs, arrays, structs, and maps.
147
+ """ # noqa: D205
152
148
 
153
149
 
154
150
  class NumericType(AtomicType):
@@ -166,54 +162,54 @@ class FractionalType(NumericType):
166
162
  class StringType(AtomicType, metaclass=DataTypeSingleton):
167
163
  """String data type."""
168
164
 
169
- def __init__(self):
165
+ def __init__(self) -> None: # noqa: D107
170
166
  super().__init__(DuckDBPyType("VARCHAR"))
171
167
 
172
168
 
173
169
  class BitstringType(AtomicType, metaclass=DataTypeSingleton):
174
170
  """Bitstring data type."""
175
171
 
176
- def __init__(self):
172
+ def __init__(self) -> None: # noqa: D107
177
173
  super().__init__(DuckDBPyType("BIT"))
178
174
 
179
175
 
180
176
  class UUIDType(AtomicType, metaclass=DataTypeSingleton):
181
177
  """UUID data type."""
182
178
 
183
- def __init__(self):
179
+ def __init__(self) -> None: # noqa: D107
184
180
  super().__init__(DuckDBPyType("UUID"))
185
181
 
186
182
 
187
183
  class BinaryType(AtomicType, metaclass=DataTypeSingleton):
188
184
  """Binary (byte array) data type."""
189
185
 
190
- def __init__(self):
186
+ def __init__(self) -> None: # noqa: D107
191
187
  super().__init__(DuckDBPyType("BLOB"))
192
188
 
193
189
 
194
190
  class BooleanType(AtomicType, metaclass=DataTypeSingleton):
195
191
  """Boolean data type."""
196
192
 
197
- def __init__(self):
193
+ def __init__(self) -> None: # noqa: D107
198
194
  super().__init__(DuckDBPyType("BOOLEAN"))
199
195
 
200
196
 
201
197
  class DateType(AtomicType, metaclass=DataTypeSingleton):
202
198
  """Date (datetime.date) data type."""
203
199
 
204
- def __init__(self):
200
+ def __init__(self) -> None: # noqa: D107
205
201
  super().__init__(DuckDBPyType("DATE"))
206
202
 
207
203
  EPOCH_ORDINAL = datetime.datetime(1970, 1, 1).toordinal()
208
204
 
209
- def needConversion(self) -> bool:
205
+ def needConversion(self) -> bool: # noqa: D102
210
206
  return True
211
207
 
212
- def toInternal(self, d: datetime.date) -> int:
208
+ def toInternal(self, d: datetime.date) -> int: # noqa: D102
213
209
  if d is not None:
214
210
  return d.toordinal() - self.EPOCH_ORDINAL
215
211
 
216
- def fromInternal(self, v: int) -> datetime.date:
212
+ def fromInternal(self, v: int) -> datetime.date: # noqa: D102
217
213
  if v is not None:
218
214
  return datetime.date.fromordinal(v + self.EPOCH_ORDINAL)
219
215
 
@@ -221,22 +217,22 @@ class DateType(AtomicType, metaclass=DataTypeSingleton):
221
217
  class TimestampType(AtomicType, metaclass=DataTypeSingleton):
222
218
  """Timestamp (datetime.datetime) data type."""
223
219
 
224
- def __init__(self):
220
+ def __init__(self) -> None: # noqa: D107
225
221
  super().__init__(DuckDBPyType("TIMESTAMPTZ"))
226
222
 
227
223
  @classmethod
228
- def typeName(cls) -> str:
224
+ def typeName(cls) -> str: # noqa: D102
229
225
  return "timestamptz"
230
226
 
231
- def needConversion(self) -> bool:
227
+ def needConversion(self) -> bool: # noqa: D102
232
228
  return True
233
229
 
234
- def toInternal(self, dt: datetime.datetime) -> int:
230
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
235
231
  if dt is not None:
236
232
  seconds = calendar.timegm(dt.utctimetuple()) if dt.tzinfo else time.mktime(dt.timetuple())
237
233
  return int(seconds) * 1000000 + dt.microsecond
238
234
 
239
- def fromInternal(self, ts: int) -> datetime.datetime:
235
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
240
236
  if ts is not None:
241
237
  # using int to avoid precision loss in float
242
238
  return datetime.datetime.fromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
@@ -245,22 +241,22 @@ class TimestampType(AtomicType, metaclass=DataTypeSingleton):
245
241
  class TimestampNTZType(AtomicType, metaclass=DataTypeSingleton):
246
242
  """Timestamp (datetime.datetime) data type without timezone information with microsecond precision."""
247
243
 
248
- def __init__(self):
244
+ def __init__(self) -> None: # noqa: D107
249
245
  super().__init__(DuckDBPyType("TIMESTAMP"))
250
246
 
251
- def needConversion(self) -> bool:
247
+ def needConversion(self) -> bool: # noqa: D102
252
248
  return True
253
249
 
254
250
  @classmethod
255
- def typeName(cls) -> str:
251
+ def typeName(cls) -> str: # noqa: D102
256
252
  return "timestamp"
257
253
 
258
- def toInternal(self, dt: datetime.datetime) -> int:
254
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
259
255
  if dt is not None:
260
256
  seconds = calendar.timegm(dt.timetuple())
261
257
  return int(seconds) * 1000000 + dt.microsecond
262
258
 
263
- def fromInternal(self, ts: int) -> datetime.datetime:
259
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
264
260
  if ts is not None:
265
261
  # using int to avoid precision loss in float
266
262
  return datetime.datetime.utcfromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
@@ -269,60 +265,60 @@ class TimestampNTZType(AtomicType, metaclass=DataTypeSingleton):
269
265
  class TimestampSecondNTZType(AtomicType, metaclass=DataTypeSingleton):
270
266
  """Timestamp (datetime.datetime) data type without timezone information with second precision."""
271
267
 
272
- def __init__(self):
268
+ def __init__(self) -> None: # noqa: D107
273
269
  super().__init__(DuckDBPyType("TIMESTAMP_S"))
274
270
 
275
- def needConversion(self) -> bool:
271
+ def needConversion(self) -> bool: # noqa: D102
276
272
  return True
277
273
 
278
274
  @classmethod
279
- def typeName(cls) -> str:
275
+ def typeName(cls) -> str: # noqa: D102
280
276
  return "timestamp_s"
281
277
 
282
- def toInternal(self, dt: datetime.datetime) -> int:
278
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
283
279
  raise ContributionsAcceptedError
284
280
 
285
- def fromInternal(self, ts: int) -> datetime.datetime:
281
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
286
282
  raise ContributionsAcceptedError
287
283
 
288
284
 
289
285
  class TimestampMilisecondNTZType(AtomicType, metaclass=DataTypeSingleton):
290
286
  """Timestamp (datetime.datetime) data type without timezone information with milisecond precision."""
291
287
 
292
- def __init__(self):
288
+ def __init__(self) -> None: # noqa: D107
293
289
  super().__init__(DuckDBPyType("TIMESTAMP_MS"))
294
290
 
295
- def needConversion(self) -> bool:
291
+ def needConversion(self) -> bool: # noqa: D102
296
292
  return True
297
293
 
298
294
  @classmethod
299
- def typeName(cls) -> str:
295
+ def typeName(cls) -> str: # noqa: D102
300
296
  return "timestamp_ms"
301
297
 
302
- def toInternal(self, dt: datetime.datetime) -> int:
298
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
303
299
  raise ContributionsAcceptedError
304
300
 
305
- def fromInternal(self, ts: int) -> datetime.datetime:
301
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
306
302
  raise ContributionsAcceptedError
307
303
 
308
304
 
309
305
  class TimestampNanosecondNTZType(AtomicType, metaclass=DataTypeSingleton):
310
306
  """Timestamp (datetime.datetime) data type without timezone information with nanosecond precision."""
311
307
 
312
- def __init__(self):
308
+ def __init__(self) -> None: # noqa: D107
313
309
  super().__init__(DuckDBPyType("TIMESTAMP_NS"))
314
310
 
315
- def needConversion(self) -> bool:
311
+ def needConversion(self) -> bool: # noqa: D102
316
312
  return True
317
313
 
318
314
  @classmethod
319
- def typeName(cls) -> str:
315
+ def typeName(cls) -> str: # noqa: D102
320
316
  return "timestamp_ns"
321
317
 
322
- def toInternal(self, dt: datetime.datetime) -> int:
318
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
323
319
  raise ContributionsAcceptedError
324
320
 
325
- def fromInternal(self, ts: int) -> datetime.datetime:
321
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
326
322
  raise ContributionsAcceptedError
327
323
 
328
324
 
@@ -346,90 +342,90 @@ class DecimalType(FractionalType):
346
342
  the number of digits on right side of dot. (default: 0)
347
343
  """
348
344
 
349
- def __init__(self, precision: int = 10, scale: int = 0):
345
+ def __init__(self, precision: int = 10, scale: int = 0) -> None: # noqa: D107
350
346
  super().__init__(duckdb.decimal_type(precision, scale))
351
347
  self.precision = precision
352
348
  self.scale = scale
353
349
  self.hasPrecisionInfo = True # this is a public API
354
350
 
355
- def simpleString(self) -> str:
356
- return "decimal(%d,%d)" % (self.precision, self.scale)
351
+ def simpleString(self) -> str: # noqa: D102
352
+ return f"decimal({int(self.precision):d},{int(self.scale):d})"
357
353
 
358
- def __repr__(self) -> str:
359
- return "DecimalType(%d,%d)" % (self.precision, self.scale)
354
+ def __repr__(self) -> str: # noqa: D105
355
+ return f"DecimalType({int(self.precision):d},{int(self.scale):d})"
360
356
 
361
357
 
362
358
  class DoubleType(FractionalType, metaclass=DataTypeSingleton):
363
359
  """Double data type, representing double precision floats."""
364
360
 
365
- def __init__(self):
361
+ def __init__(self) -> None: # noqa: D107
366
362
  super().__init__(DuckDBPyType("DOUBLE"))
367
363
 
368
364
 
369
365
  class FloatType(FractionalType, metaclass=DataTypeSingleton):
370
366
  """Float data type, representing single precision floats."""
371
367
 
372
- def __init__(self):
368
+ def __init__(self) -> None: # noqa: D107
373
369
  super().__init__(DuckDBPyType("FLOAT"))
374
370
 
375
371
 
376
372
  class ByteType(IntegralType):
377
373
  """Byte data type, i.e. a signed integer in a single byte."""
378
374
 
379
- def __init__(self):
375
+ def __init__(self) -> None: # noqa: D107
380
376
  super().__init__(DuckDBPyType("TINYINT"))
381
377
 
382
- def simpleString(self) -> str:
378
+ def simpleString(self) -> str: # noqa: D102
383
379
  return "tinyint"
384
380
 
385
381
 
386
382
  class UnsignedByteType(IntegralType):
387
383
  """Unsigned byte data type, i.e. a unsigned integer in a single byte."""
388
384
 
389
- def __init__(self):
385
+ def __init__(self) -> None: # noqa: D107
390
386
  super().__init__(DuckDBPyType("UTINYINT"))
391
387
 
392
- def simpleString(self) -> str:
388
+ def simpleString(self) -> str: # noqa: D102
393
389
  return "utinyint"
394
390
 
395
391
 
396
392
  class ShortType(IntegralType):
397
393
  """Short data type, i.e. a signed 16-bit integer."""
398
394
 
399
- def __init__(self):
395
+ def __init__(self) -> None: # noqa: D107
400
396
  super().__init__(DuckDBPyType("SMALLINT"))
401
397
 
402
- def simpleString(self) -> str:
398
+ def simpleString(self) -> str: # noqa: D102
403
399
  return "smallint"
404
400
 
405
401
 
406
402
  class UnsignedShortType(IntegralType):
407
403
  """Unsigned short data type, i.e. a unsigned 16-bit integer."""
408
404
 
409
- def __init__(self):
405
+ def __init__(self) -> None: # noqa: D107
410
406
  super().__init__(DuckDBPyType("USMALLINT"))
411
407
 
412
- def simpleString(self) -> str:
408
+ def simpleString(self) -> str: # noqa: D102
413
409
  return "usmallint"
414
410
 
415
411
 
416
412
  class IntegerType(IntegralType):
417
413
  """Int data type, i.e. a signed 32-bit integer."""
418
414
 
419
- def __init__(self):
415
+ def __init__(self) -> None: # noqa: D107
420
416
  super().__init__(DuckDBPyType("INTEGER"))
421
417
 
422
- def simpleString(self) -> str:
418
+ def simpleString(self) -> str: # noqa: D102
423
419
  return "integer"
424
420
 
425
421
 
426
422
  class UnsignedIntegerType(IntegralType):
427
423
  """Unsigned int data type, i.e. a unsigned 32-bit integer."""
428
424
 
429
- def __init__(self):
425
+ def __init__(self) -> None: # noqa: D107
430
426
  super().__init__(DuckDBPyType("UINTEGER"))
431
427
 
432
- def simpleString(self) -> str:
428
+ def simpleString(self) -> str: # noqa: D102
433
429
  return "uinteger"
434
430
 
435
431
 
@@ -440,10 +436,10 @@ class LongType(IntegralType):
440
436
  please use :class:`DecimalType`.
441
437
  """
442
438
 
443
- def __init__(self):
439
+ def __init__(self) -> None: # noqa: D107
444
440
  super().__init__(DuckDBPyType("BIGINT"))
445
441
 
446
- def simpleString(self) -> str:
442
+ def simpleString(self) -> str: # noqa: D102
447
443
  return "bigint"
448
444
 
449
445
 
@@ -454,24 +450,24 @@ class UnsignedLongType(IntegralType):
454
450
  please use :class:`HugeIntegerType`.
455
451
  """
456
452
 
457
- def __init__(self):
453
+ def __init__(self) -> None: # noqa: D107
458
454
  super().__init__(DuckDBPyType("UBIGINT"))
459
455
 
460
- def simpleString(self) -> str:
456
+ def simpleString(self) -> str: # noqa: D102
461
457
  return "ubigint"
462
458
 
463
459
 
464
460
  class HugeIntegerType(IntegralType):
465
461
  """Huge integer data type, i.e. a signed 128-bit integer.
466
462
 
467
- If the values are beyond the range of [-170141183460469231731687303715884105728, 170141183460469231731687303715884105727],
468
- please use :class:`DecimalType`.
463
+ If the values are beyond the range of [-170141183460469231731687303715884105728,
464
+ 170141183460469231731687303715884105727], please use :class:`DecimalType`.
469
465
  """
470
466
 
471
- def __init__(self):
467
+ def __init__(self) -> None: # noqa: D107
472
468
  super().__init__(DuckDBPyType("HUGEINT"))
473
469
 
474
- def simpleString(self) -> str:
470
+ def simpleString(self) -> str: # noqa: D102
475
471
  return "hugeint"
476
472
 
477
473
 
@@ -482,30 +478,30 @@ class UnsignedHugeIntegerType(IntegralType):
482
478
  please use :class:`DecimalType`.
483
479
  """
484
480
 
485
- def __init__(self):
481
+ def __init__(self) -> None: # noqa: D107
486
482
  super().__init__(DuckDBPyType("UHUGEINT"))
487
483
 
488
- def simpleString(self) -> str:
484
+ def simpleString(self) -> str: # noqa: D102
489
485
  return "uhugeint"
490
486
 
491
487
 
492
488
  class TimeType(IntegralType):
493
489
  """Time (datetime.time) data type."""
494
490
 
495
- def __init__(self):
491
+ def __init__(self) -> None: # noqa: D107
496
492
  super().__init__(DuckDBPyType("TIMETZ"))
497
493
 
498
- def simpleString(self) -> str:
494
+ def simpleString(self) -> str: # noqa: D102
499
495
  return "timetz"
500
496
 
501
497
 
502
498
  class TimeNTZType(IntegralType):
503
499
  """Time (datetime.time) data type without timezone information."""
504
500
 
505
- def __init__(self):
501
+ def __init__(self) -> None: # noqa: D107
506
502
  super().__init__(DuckDBPyType("TIME"))
507
503
 
508
- def simpleString(self) -> str:
504
+ def simpleString(self) -> str: # noqa: D102
509
505
  return "time"
510
506
 
511
507
 
@@ -517,16 +513,18 @@ class DayTimeIntervalType(AtomicType):
517
513
  MINUTE = 2
518
514
  SECOND = 3
519
515
 
520
- _fields = {
521
- DAY: "day",
522
- HOUR: "hour",
523
- MINUTE: "minute",
524
- SECOND: "second",
525
- }
516
+ _fields: Mapping[str, int] = MappingProxyType(
517
+ {
518
+ DAY: "day",
519
+ HOUR: "hour",
520
+ MINUTE: "minute",
521
+ SECOND: "second",
522
+ }
523
+ )
526
524
 
527
- _inverted_fields = dict(zip(_fields.values(), _fields.keys()))
525
+ _inverted_fields: Mapping[int, str] = MappingProxyType(dict(zip(_fields.values(), _fields.keys())))
528
526
 
529
- def __init__(self, startField: Optional[int] = None, endField: Optional[int] = None):
527
+ def __init__(self, startField: Optional[int] = None, endField: Optional[int] = None) -> None: # noqa: D107
530
528
  super().__init__(DuckDBPyType("INTERVAL"))
531
529
  if startField is None and endField is None:
532
530
  # Default matched to scala side.
@@ -536,33 +534,34 @@ class DayTimeIntervalType(AtomicType):
536
534
  endField = startField
537
535
 
538
536
  fields = DayTimeIntervalType._fields
539
- if startField not in fields.keys() or endField not in fields.keys():
540
- raise RuntimeError("interval %s to %s is invalid" % (startField, endField))
541
- self.startField = cast(int, startField)
542
- self.endField = cast(int, endField)
537
+ if startField not in fields or endField not in fields:
538
+ msg = f"interval {startField} to {endField} is invalid"
539
+ raise RuntimeError(msg)
540
+ self.startField = cast("int", startField)
541
+ self.endField = cast("int", endField)
543
542
 
544
543
  def _str_repr(self) -> str:
545
544
  fields = DayTimeIntervalType._fields
546
545
  start_field_name = fields[self.startField]
547
546
  end_field_name = fields[self.endField]
548
547
  if start_field_name == end_field_name:
549
- return "interval %s" % start_field_name
548
+ return f"interval {start_field_name}"
550
549
  else:
551
- return "interval %s to %s" % (start_field_name, end_field_name)
550
+ return f"interval {start_field_name} to {end_field_name}"
552
551
 
553
552
  simpleString = _str_repr
554
553
 
555
- def __repr__(self) -> str:
556
- return "%s(%d, %d)" % (type(self).__name__, self.startField, self.endField)
554
+ def __repr__(self) -> str: # noqa: D105
555
+ return f"{type(self).__name__}({int(self.startField):d}, {int(self.endField):d})"
557
556
 
558
- def needConversion(self) -> bool:
557
+ def needConversion(self) -> bool: # noqa: D102
559
558
  return True
560
559
 
561
- def toInternal(self, dt: datetime.timedelta) -> Optional[int]:
560
+ def toInternal(self, dt: datetime.timedelta) -> Optional[int]: # noqa: D102
562
561
  if dt is not None:
563
562
  return (math.floor(dt.total_seconds()) * 1000000) + dt.microseconds
564
563
 
565
- def fromInternal(self, micros: int) -> Optional[datetime.timedelta]:
564
+ def fromInternal(self, micros: int) -> Optional[datetime.timedelta]: # noqa: D102
566
565
  if micros is not None:
567
566
  return datetime.timedelta(microseconds=micros)
568
567
 
@@ -577,7 +576,7 @@ class ArrayType(DataType):
577
576
  containsNull : bool, optional
578
577
  whether the array can contain null (None) values.
579
578
 
580
- Examples
579
+ Examples:
581
580
  --------
582
581
  >>> ArrayType(StringType()) == ArrayType(StringType(), True)
583
582
  True
@@ -585,30 +584,27 @@ class ArrayType(DataType):
585
584
  False
586
585
  """
587
586
 
588
- def __init__(self, elementType: DataType, containsNull: bool = True):
587
+ def __init__(self, elementType: DataType, containsNull: bool = True) -> None: # noqa: D107
589
588
  super().__init__(duckdb.list_type(elementType.duckdb_type))
590
- assert isinstance(elementType, DataType), "elementType %s should be an instance of %s" % (
591
- elementType,
592
- DataType,
593
- )
589
+ assert isinstance(elementType, DataType), f"elementType {elementType} should be an instance of {DataType}"
594
590
  self.elementType = elementType
595
591
  self.containsNull = containsNull
596
592
 
597
- def simpleString(self) -> str:
598
- return "array<%s>" % self.elementType.simpleString()
593
+ def simpleString(self) -> str: # noqa: D102
594
+ return f"array<{self.elementType.simpleString()}>"
599
595
 
600
- def __repr__(self) -> str:
601
- return "ArrayType(%s, %s)" % (self.elementType, str(self.containsNull))
596
+ def __repr__(self) -> str: # noqa: D105
597
+ return f"ArrayType({self.elementType}, {self.containsNull!s})"
602
598
 
603
- def needConversion(self) -> bool:
599
+ def needConversion(self) -> bool: # noqa: D102
604
600
  return self.elementType.needConversion()
605
601
 
606
- def toInternal(self, obj: List[Optional[T]]) -> List[Optional[T]]:
602
+ def toInternal(self, obj: list[Optional[T]]) -> list[Optional[T]]: # noqa: D102
607
603
  if not self.needConversion():
608
604
  return obj
609
605
  return obj and [self.elementType.toInternal(v) for v in obj]
610
606
 
611
- def fromInternal(self, obj: List[Optional[T]]) -> List[Optional[T]]:
607
+ def fromInternal(self, obj: list[Optional[T]]) -> list[Optional[T]]: # noqa: D102
612
608
  if not self.needConversion():
613
609
  return obj
614
610
  return obj and [self.elementType.fromInternal(v) for v in obj]
@@ -626,59 +622,44 @@ class MapType(DataType):
626
622
  valueContainsNull : bool, optional
627
623
  indicates whether values can contain null (None) values.
628
624
 
629
- Notes
625
+ Notes:
630
626
  -----
631
627
  Keys in a map data type are not allowed to be null (None).
632
628
 
633
- Examples
629
+ Examples:
634
630
  --------
635
- >>> (MapType(StringType(), IntegerType())
636
- ... == MapType(StringType(), IntegerType(), True))
631
+ >>> (MapType(StringType(), IntegerType()) == MapType(StringType(), IntegerType(), True))
637
632
  True
638
- >>> (MapType(StringType(), IntegerType(), False)
639
- ... == MapType(StringType(), FloatType()))
633
+ >>> (MapType(StringType(), IntegerType(), False) == MapType(StringType(), FloatType()))
640
634
  False
641
635
  """
642
636
 
643
- def __init__(self, keyType: DataType, valueType: DataType, valueContainsNull: bool = True):
637
+ def __init__(self, keyType: DataType, valueType: DataType, valueContainsNull: bool = True) -> None: # noqa: D107
644
638
  super().__init__(duckdb.map_type(keyType.duckdb_type, valueType.duckdb_type))
645
- assert isinstance(keyType, DataType), "keyType %s should be an instance of %s" % (
646
- keyType,
647
- DataType,
648
- )
649
- assert isinstance(valueType, DataType), "valueType %s should be an instance of %s" % (
650
- valueType,
651
- DataType,
652
- )
639
+ assert isinstance(keyType, DataType), f"keyType {keyType} should be an instance of {DataType}"
640
+ assert isinstance(valueType, DataType), f"valueType {valueType} should be an instance of {DataType}"
653
641
  self.keyType = keyType
654
642
  self.valueType = valueType
655
643
  self.valueContainsNull = valueContainsNull
656
644
 
657
- def simpleString(self) -> str:
658
- return "map<%s,%s>" % (
659
- self.keyType.simpleString(),
660
- self.valueType.simpleString(),
661
- )
645
+ def simpleString(self) -> str: # noqa: D102
646
+ return f"map<{self.keyType.simpleString()},{self.valueType.simpleString()}>"
662
647
 
663
- def __repr__(self) -> str:
664
- return "MapType(%s, %s, %s)" % (
665
- self.keyType,
666
- self.valueType,
667
- str(self.valueContainsNull),
668
- )
648
+ def __repr__(self) -> str: # noqa: D105
649
+ return f"MapType({self.keyType}, {self.valueType}, {self.valueContainsNull!s})"
669
650
 
670
- def needConversion(self) -> bool:
651
+ def needConversion(self) -> bool: # noqa: D102
671
652
  return self.keyType.needConversion() or self.valueType.needConversion()
672
653
 
673
- def toInternal(self, obj: Dict[T, Optional[U]]) -> Dict[T, Optional[U]]:
654
+ def toInternal(self, obj: dict[T, Optional[U]]) -> dict[T, Optional[U]]: # noqa: D102
674
655
  if not self.needConversion():
675
656
  return obj
676
- return obj and dict((self.keyType.toInternal(k), self.valueType.toInternal(v)) for k, v in obj.items())
657
+ return obj and {self.keyType.toInternal(k): self.valueType.toInternal(v) for k, v in obj.items()}
677
658
 
678
- def fromInternal(self, obj: Dict[T, Optional[U]]) -> Dict[T, Optional[U]]:
659
+ def fromInternal(self, obj: dict[T, Optional[U]]) -> dict[T, Optional[U]]: # noqa: D102
679
660
  if not self.needConversion():
680
661
  return obj
681
- return obj and dict((self.keyType.fromInternal(k), self.valueType.fromInternal(v)) for k, v in obj.items())
662
+ return obj and {self.keyType.fromInternal(k): self.valueType.fromInternal(v) for k, v in obj.items()}
682
663
 
683
664
 
684
665
  class StructField(DataType):
@@ -695,66 +676,58 @@ class StructField(DataType):
695
676
  metadata : dict, optional
696
677
  a dict from string to simple type that can be toInternald to JSON automatically
697
678
 
698
- Examples
679
+ Examples:
699
680
  --------
700
- >>> (StructField("f1", StringType(), True)
701
- ... == StructField("f1", StringType(), True))
681
+ >>> (StructField("f1", StringType(), True) == StructField("f1", StringType(), True))
702
682
  True
703
- >>> (StructField("f1", StringType(), True)
704
- ... == StructField("f2", StringType(), True))
683
+ >>> (StructField("f1", StringType(), True) == StructField("f2", StringType(), True))
705
684
  False
706
685
  """
707
686
 
708
- def __init__(
687
+ def __init__( # noqa: D107
709
688
  self,
710
689
  name: str,
711
690
  dataType: DataType,
712
691
  nullable: bool = True,
713
- metadata: Optional[Dict[str, Any]] = None,
714
- ):
692
+ metadata: Optional[dict[str, Any]] = None,
693
+ ) -> None:
715
694
  super().__init__(dataType.duckdb_type)
716
- assert isinstance(dataType, DataType), "dataType %s should be an instance of %s" % (
717
- dataType,
718
- DataType,
719
- )
720
- assert isinstance(name, str), "field name %s should be a string" % (name)
695
+ assert isinstance(dataType, DataType), f"dataType {dataType} should be an instance of {DataType}"
696
+ assert isinstance(name, str), f"field name {name} should be a string"
721
697
  self.name = name
722
698
  self.dataType = dataType
723
699
  self.nullable = nullable
724
700
  self.metadata = metadata or {}
725
701
 
726
- def simpleString(self) -> str:
727
- return "%s:%s" % (self.name, self.dataType.simpleString())
702
+ def simpleString(self) -> str: # noqa: D102
703
+ return f"{self.name}:{self.dataType.simpleString()}"
728
704
 
729
- def __repr__(self) -> str:
730
- return "StructField('%s', %s, %s)" % (
731
- self.name,
732
- self.dataType,
733
- str(self.nullable),
734
- )
705
+ def __repr__(self) -> str: # noqa: D105
706
+ return f"StructField('{self.name}', {self.dataType}, {self.nullable!s})"
735
707
 
736
- def needConversion(self) -> bool:
708
+ def needConversion(self) -> bool: # noqa: D102
737
709
  return self.dataType.needConversion()
738
710
 
739
- def toInternal(self, obj: T) -> T:
711
+ def toInternal(self, obj: T) -> T: # noqa: D102
740
712
  return self.dataType.toInternal(obj)
741
713
 
742
- def fromInternal(self, obj: T) -> T:
714
+ def fromInternal(self, obj: T) -> T: # noqa: D102
743
715
  return self.dataType.fromInternal(obj)
744
716
 
745
- def typeName(self) -> str: # type: ignore[override]
746
- raise TypeError("StructField does not have typeName. " "Use typeName on its type explicitly instead.")
717
+ def typeName(self) -> str: # type: ignore[override] # noqa: D102
718
+ msg = "StructField does not have typeName. Use typeName on its type explicitly instead."
719
+ raise TypeError(msg)
747
720
 
748
721
 
749
722
  class StructType(DataType):
750
- """Struct type, consisting of a list of :class:`StructField`.
723
+ r"""Struct type, consisting of a list of :class:`StructField`.
751
724
 
752
725
  This is the data type representing a :class:`Row`.
753
726
 
754
727
  Iterating a :class:`StructType` will iterate over its :class:`StructField`\\s.
755
728
  A contained :class:`StructField` can be accessed by its name or position.
756
729
 
757
- Examples
730
+ Examples:
758
731
  --------
759
732
  >>> struct1 = StructType([StructField("f1", StringType(), True)])
760
733
  >>> struct1["f1"]
@@ -767,16 +740,17 @@ class StructType(DataType):
767
740
  >>> struct1 == struct2
768
741
  True
769
742
  >>> struct1 = StructType([StructField("f1", StringType(), True)])
770
- >>> struct2 = StructType([StructField("f1", StringType(), True),
771
- ... StructField("f2", IntegerType(), False)])
743
+ >>> struct2 = StructType(
744
+ ... [StructField("f1", StringType(), True), StructField("f2", IntegerType(), False)]
745
+ ... )
772
746
  >>> struct1 == struct2
773
747
  False
774
748
  """
775
749
 
776
- def _update_internal_duckdb_type(self):
750
+ def _update_internal_duckdb_type(self) -> None:
777
751
  self.duckdb_type = duckdb.struct_type(dict(zip(self.names, [x.duckdb_type for x in self.fields])))
778
752
 
779
- def __init__(self, fields: Optional[List[StructField]] = None):
753
+ def __init__(self, fields: Optional[list[StructField]] = None) -> None: # noqa: D107
780
754
  if not fields:
781
755
  self.fields = []
782
756
  self.names = []
@@ -795,23 +769,20 @@ class StructType(DataType):
795
769
  field: str,
796
770
  data_type: Union[str, DataType],
797
771
  nullable: bool = True,
798
- metadata: Optional[Dict[str, Any]] = None,
799
- ) -> "StructType":
800
- ...
772
+ metadata: Optional[dict[str, Any]] = None,
773
+ ) -> "StructType": ...
801
774
 
802
775
  @overload
803
- def add(self, field: StructField) -> "StructType":
804
- ...
776
+ def add(self, field: StructField) -> "StructType": ...
805
777
 
806
778
  def add(
807
779
  self,
808
780
  field: Union[str, StructField],
809
781
  data_type: Optional[Union[str, DataType]] = None,
810
782
  nullable: bool = True,
811
- metadata: Optional[Dict[str, Any]] = None,
783
+ metadata: Optional[dict[str, Any]] = None,
812
784
  ) -> "StructType":
813
- """
814
- Construct a :class:`StructType` by adding new elements to it, to define the schema.
785
+ r"""Construct a :class:`StructType` by adding new elements to it, to define the schema.
815
786
  The method accepts either:
816
787
 
817
788
  a) A single parameter which is a :class:`StructField` object.
@@ -830,11 +801,11 @@ class StructType(DataType):
830
801
  metadata : dict, optional
831
802
  Any additional metadata (default None)
832
803
 
833
- Returns
804
+ Returns:
834
805
  -------
835
806
  :class:`StructType`
836
807
 
837
- Examples
808
+ Examples:
838
809
  --------
839
810
  >>> struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
840
811
  >>> struct2 = StructType([StructField("f1", StringType(), True), \\
@@ -849,13 +820,14 @@ class StructType(DataType):
849
820
  >>> struct2 = StructType([StructField("f1", StringType(), True)])
850
821
  >>> struct1 == struct2
851
822
  True
852
- """
823
+ """ # noqa: D205, D415
853
824
  if isinstance(field, StructField):
854
825
  self.fields.append(field)
855
826
  self.names.append(field.name)
856
827
  else:
857
828
  if isinstance(field, str) and data_type is None:
858
- raise ValueError("Must specify DataType if passing name of struct_field to create.")
829
+ msg = "Must specify DataType if passing name of struct_field to create."
830
+ raise ValueError(msg)
859
831
  else:
860
832
  data_type_f = data_type
861
833
  self.fields.append(StructField(field, data_type_f, nullable, metadata))
@@ -867,7 +839,7 @@ class StructType(DataType):
867
839
  return self
868
840
 
869
841
  def __iter__(self) -> Iterator[StructField]:
870
- """Iterate the fields"""
842
+ """Iterate the fields."""
871
843
  return iter(self.fields)
872
844
 
873
845
  def __len__(self) -> int:
@@ -880,27 +852,30 @@ class StructType(DataType):
880
852
  for field in self:
881
853
  if field.name == key:
882
854
  return field
883
- raise KeyError("No StructField named {0}".format(key))
855
+ msg = f"No StructField named {key}"
856
+ raise KeyError(msg)
884
857
  elif isinstance(key, int):
885
858
  try:
886
859
  return self.fields[key]
887
860
  except IndexError:
888
- raise IndexError("StructType index out of range")
861
+ msg = "StructType index out of range"
862
+ raise IndexError(msg) # noqa: B904
889
863
  elif isinstance(key, slice):
890
864
  return StructType(self.fields[key])
891
865
  else:
892
- raise TypeError("StructType keys should be strings, integers or slices")
866
+ msg = "StructType keys should be strings, integers or slices"
867
+ raise TypeError(msg)
893
868
 
894
- def simpleString(self) -> str:
895
- return "struct<%s>" % (",".join(f.simpleString() for f in self))
869
+ def simpleString(self) -> str: # noqa: D102
870
+ return "struct<{}>".format(",".join(f.simpleString() for f in self))
896
871
 
897
- def __repr__(self) -> str:
898
- return "StructType([%s])" % ", ".join(str(field) for field in self)
872
+ def __repr__(self) -> str: # noqa: D105
873
+ return "StructType([{}])".format(", ".join(str(field) for field in self))
899
874
 
900
- def __contains__(self, item: Any) -> bool:
875
+ def __contains__(self, item: str) -> bool: # noqa: D105
901
876
  return item in self.names
902
877
 
903
- def extract_types_and_names(self) -> Tuple[List[str], List[str]]:
878
+ def extract_types_and_names(self) -> tuple[list[str], list[str]]: # noqa: D102
904
879
  names = []
905
880
  types = []
906
881
  for f in self.fields:
@@ -908,11 +883,10 @@ class StructType(DataType):
908
883
  names.append(f.name)
909
884
  return (types, names)
910
885
 
911
- def fieldNames(self) -> List[str]:
912
- """
913
- Returns all field names in a list.
886
+ def fieldNames(self) -> list[str]:
887
+ """Returns all field names in a list.
914
888
 
915
- Examples
889
+ Examples:
916
890
  --------
917
891
  >>> struct = StructType([StructField("f1", StringType(), True)])
918
892
  >>> struct.fieldNames()
@@ -920,11 +894,11 @@ class StructType(DataType):
920
894
  """
921
895
  return list(self.names)
922
896
 
923
- def needConversion(self) -> bool:
897
+ def needConversion(self) -> bool: # noqa: D102
924
898
  # We need convert Row()/namedtuple into tuple()
925
899
  return True
926
900
 
927
- def toInternal(self, obj: Tuple) -> Tuple:
901
+ def toInternal(self, obj: tuple) -> tuple: # noqa: D102
928
902
  if obj is None:
929
903
  return
930
904
 
@@ -944,7 +918,8 @@ class StructType(DataType):
944
918
  for n, f, c in zip(self.names, self.fields, self._needConversion)
945
919
  )
946
920
  else:
947
- raise ValueError("Unexpected tuple %r with StructType" % obj)
921
+ msg = f"Unexpected tuple {obj!r} with StructType"
922
+ raise ValueError(msg)
948
923
  else:
949
924
  if isinstance(obj, dict):
950
925
  return tuple(obj.get(n) for n in self.names)
@@ -954,16 +929,17 @@ class StructType(DataType):
954
929
  d = obj.__dict__
955
930
  return tuple(d.get(n) for n in self.names)
956
931
  else:
957
- raise ValueError("Unexpected tuple %r with StructType" % obj)
932
+ msg = f"Unexpected tuple {obj!r} with StructType"
933
+ raise ValueError(msg)
958
934
 
959
- def fromInternal(self, obj: Tuple) -> "Row":
935
+ def fromInternal(self, obj: tuple) -> "Row": # noqa: D102
960
936
  if obj is None:
961
937
  return
962
938
  if isinstance(obj, Row):
963
939
  # it's already converted by pickler
964
940
  return obj
965
941
 
966
- values: Union[Tuple, List]
942
+ values: Union[tuple, list]
967
943
  if self._needSerializeAnyField:
968
944
  # Only calling fromInternal function for fields that need conversion
969
945
  values = [f.fromInternal(v) if c else v for f, v, c in zip(self.fields, obj, self._needConversion)]
@@ -973,7 +949,7 @@ class StructType(DataType):
973
949
 
974
950
 
975
951
  class UnionType(DataType):
976
- def __init__(self):
952
+ def __init__(self) -> None:
977
953
  raise ContributionsAcceptedError
978
954
 
979
955
 
@@ -983,7 +959,7 @@ class UserDefinedType(DataType):
983
959
  .. note:: WARN: Spark Internal Use Only
984
960
  """
985
961
 
986
- def __init__(self):
962
+ def __init__(self) -> None:
987
963
  raise ContributionsAcceptedError
988
964
 
989
965
  @classmethod
@@ -992,24 +968,21 @@ class UserDefinedType(DataType):
992
968
 
993
969
  @classmethod
994
970
  def sqlType(cls) -> DataType:
995
- """
996
- Underlying SQL storage type for this UDT.
997
- """
998
- raise NotImplementedError("UDT must implement sqlType().")
971
+ """Underlying SQL storage type for this UDT."""
972
+ msg = "UDT must implement sqlType()."
973
+ raise NotImplementedError(msg)
999
974
 
1000
975
  @classmethod
1001
976
  def module(cls) -> str:
1002
- """
1003
- The Python module of the UDT.
1004
- """
1005
- raise NotImplementedError("UDT must implement module().")
977
+ """The Python module of the UDT."""
978
+ msg = "UDT must implement module()."
979
+ raise NotImplementedError(msg)
1006
980
 
1007
981
  @classmethod
1008
982
  def scalaUDT(cls) -> str:
1009
- """
1010
- The class name of the paired Scala UDT (could be '', if there
983
+ """The class name of the paired Scala UDT (could be '', if there
1011
984
  is no corresponding one).
1012
- """
985
+ """ # noqa: D205
1013
986
  return ""
1014
987
 
1015
988
  def needConversion(self) -> bool:
@@ -1017,42 +990,38 @@ class UserDefinedType(DataType):
1017
990
 
1018
991
  @classmethod
1019
992
  def _cachedSqlType(cls) -> DataType:
1020
- """
1021
- Cache the sqlType() into class, because it's heavily used in `toInternal`.
1022
- """
993
+ """Cache the sqlType() into class, because it's heavily used in `toInternal`."""
1023
994
  if not hasattr(cls, "_cached_sql_type"):
1024
995
  cls._cached_sql_type = cls.sqlType() # type: ignore[attr-defined]
1025
996
  return cls._cached_sql_type # type: ignore[attr-defined]
1026
997
 
1027
- def toInternal(self, obj: Any) -> Any:
998
+ def toInternal(self, obj: Any) -> Any: # noqa: ANN401
1028
999
  if obj is not None:
1029
1000
  return self._cachedSqlType().toInternal(self.serialize(obj))
1030
1001
 
1031
- def fromInternal(self, obj: Any) -> Any:
1002
+ def fromInternal(self, obj: Any) -> Any: # noqa: ANN401
1032
1003
  v = self._cachedSqlType().fromInternal(obj)
1033
1004
  if v is not None:
1034
1005
  return self.deserialize(v)
1035
1006
 
1036
- def serialize(self, obj: Any) -> Any:
1037
- """
1038
- Converts a user-type object into a SQL datum.
1039
- """
1040
- raise NotImplementedError("UDT must implement toInternal().")
1007
+ def serialize(self, obj: Any) -> NoReturn: # noqa: ANN401
1008
+ """Converts a user-type object into a SQL datum."""
1009
+ msg = "UDT must implement toInternal()."
1010
+ raise NotImplementedError(msg)
1041
1011
 
1042
- def deserialize(self, datum: Any) -> Any:
1043
- """
1044
- Converts a SQL datum into a user-type object.
1045
- """
1046
- raise NotImplementedError("UDT must implement fromInternal().")
1012
+ def deserialize(self, datum: Any) -> NoReturn: # noqa: ANN401
1013
+ """Converts a SQL datum into a user-type object."""
1014
+ msg = "UDT must implement fromInternal()."
1015
+ raise NotImplementedError(msg)
1047
1016
 
1048
1017
  def simpleString(self) -> str:
1049
1018
  return "udt"
1050
1019
 
1051
- def __eq__(self, other: Any) -> bool:
1052
- return type(self) == type(other)
1020
+ def __eq__(self, other: object) -> bool:
1021
+ return type(self) is type(other)
1053
1022
 
1054
1023
 
1055
- _atomic_types: List[Type[DataType]] = [
1024
+ _atomic_types: list[type[DataType]] = [
1056
1025
  StringType,
1057
1026
  BinaryType,
1058
1027
  BooleanType,
@@ -1068,32 +1037,28 @@ _atomic_types: List[Type[DataType]] = [
1068
1037
  TimestampNTZType,
1069
1038
  NullType,
1070
1039
  ]
1071
- _all_atomic_types: Dict[str, Type[DataType]] = dict((t.typeName(), t) for t in _atomic_types)
1040
+ _all_atomic_types: dict[str, type[DataType]] = {t.typeName(): t for t in _atomic_types}
1072
1041
 
1073
- _complex_types: List[Type[Union[ArrayType, MapType, StructType]]] = [
1042
+ _complex_types: list[type[Union[ArrayType, MapType, StructType]]] = [
1074
1043
  ArrayType,
1075
1044
  MapType,
1076
1045
  StructType,
1077
1046
  ]
1078
- _all_complex_types: Dict[str, Type[Union[ArrayType, MapType, StructType]]] = dict(
1079
- (v.typeName(), v) for v in _complex_types
1080
- )
1047
+ _all_complex_types: dict[str, type[Union[ArrayType, MapType, StructType]]] = {v.typeName(): v for v in _complex_types}
1081
1048
 
1082
1049
 
1083
1050
  _FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)")
1084
1051
  _INTERVAL_DAYTIME = re.compile(r"interval (day|hour|minute|second)( to (day|hour|minute|second))?")
1085
1052
 
1086
1053
 
1087
- def _create_row(fields: Union["Row", List[str]], values: Union[Tuple[Any, ...], List[Any]]) -> "Row":
1054
+ def _create_row(fields: Union["Row", list[str]], values: Union[tuple[Any, ...], list[Any]]) -> "Row":
1088
1055
  row = Row(*values)
1089
1056
  row.__fields__ = fields
1090
1057
  return row
1091
1058
 
1092
1059
 
1093
1060
  class Row(tuple):
1094
-
1095
- """
1096
- A row in :class:`DataFrame`.
1061
+ """A row in :class:`DataFrame`.
1097
1062
  The fields in it can be accessed:
1098
1063
 
1099
1064
  * like attributes (``row.key``)
@@ -1110,18 +1075,18 @@ class Row(tuple):
1110
1075
  field names sorted alphabetically and will be ordered in the position as
1111
1076
  entered.
1112
1077
 
1113
- Examples
1078
+ Examples:
1114
1079
  --------
1115
1080
  >>> row = Row(name="Alice", age=11)
1116
1081
  >>> row
1117
1082
  Row(name='Alice', age=11)
1118
- >>> row['name'], row['age']
1083
+ >>> row["name"], row["age"]
1119
1084
  ('Alice', 11)
1120
1085
  >>> row.name, row.age
1121
1086
  ('Alice', 11)
1122
- >>> 'name' in row
1087
+ >>> "name" in row
1123
1088
  True
1124
- >>> 'wrong_key' in row
1089
+ >>> "wrong_key" in row
1125
1090
  False
1126
1091
 
1127
1092
  Row also can be used to create another Row like class, then it
@@ -1130,9 +1095,9 @@ class Row(tuple):
1130
1095
  >>> Person = Row("name", "age")
1131
1096
  >>> Person
1132
1097
  <Row('name', 'age')>
1133
- >>> 'name' in Person
1098
+ >>> "name" in Person
1134
1099
  True
1135
- >>> 'wrong_key' in Person
1100
+ >>> "wrong_key" in Person
1136
1101
  False
1137
1102
  >>> Person("Alice", 11)
1138
1103
  Row(name='Alice', age=11)
@@ -1144,19 +1109,18 @@ class Row(tuple):
1144
1109
  >>> row2 = Row(name="Alice", age=11)
1145
1110
  >>> row1 == row2
1146
1111
  True
1147
- """
1112
+ """ # noqa: D205, D415
1148
1113
 
1149
1114
  @overload
1150
- def __new__(cls, *args: str) -> "Row":
1151
- ...
1115
+ def __new__(cls, *args: str) -> "Row": ...
1152
1116
 
1153
1117
  @overload
1154
- def __new__(cls, **kwargs: Any) -> "Row":
1155
- ...
1118
+ def __new__(cls, **kwargs: Any) -> "Row": ... # noqa: ANN401
1156
1119
 
1157
- def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row":
1120
+ def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row": # noqa: D102
1158
1121
  if args and kwargs:
1159
- raise ValueError("Can not use both args " "and kwargs to create Row")
1122
+ msg = "Can not use both args and kwargs to create Row"
1123
+ raise ValueError(msg)
1160
1124
  if kwargs:
1161
1125
  # create row objects
1162
1126
  row = tuple.__new__(cls, list(kwargs.values()))
@@ -1166,16 +1130,15 @@ class Row(tuple):
1166
1130
  # create row class or objects
1167
1131
  return tuple.__new__(cls, args)
1168
1132
 
1169
- def asDict(self, recursive: bool = False) -> Dict[str, Any]:
1170
- """
1171
- Return as a dict
1133
+ def asDict(self, recursive: bool = False) -> dict[str, Any]:
1134
+ """Return as a dict.
1172
1135
 
1173
1136
  Parameters
1174
1137
  ----------
1175
1138
  recursive : bool, optional
1176
1139
  turns the nested Rows to dict (default: False).
1177
1140
 
1178
- Notes
1141
+ Notes:
1179
1142
  -----
1180
1143
  If a row contains duplicate field names, e.g., the rows of a join
1181
1144
  between two :class:`DataFrame` that both have the fields of same names,
@@ -1183,28 +1146,29 @@ class Row(tuple):
1183
1146
  will also return one of the duplicate fields, however returned value might
1184
1147
  be different to ``asDict``.
1185
1148
 
1186
- Examples
1149
+ Examples:
1187
1150
  --------
1188
- >>> Row(name="Alice", age=11).asDict() == {'name': 'Alice', 'age': 11}
1151
+ >>> Row(name="Alice", age=11).asDict() == {"name": "Alice", "age": 11}
1189
1152
  True
1190
- >>> row = Row(key=1, value=Row(name='a', age=2))
1191
- >>> row.asDict() == {'key': 1, 'value': Row(name='a', age=2)}
1153
+ >>> row = Row(key=1, value=Row(name="a", age=2))
1154
+ >>> row.asDict() == {"key": 1, "value": Row(name="a", age=2)}
1192
1155
  True
1193
- >>> row.asDict(True) == {'key': 1, 'value': {'name': 'a', 'age': 2}}
1156
+ >>> row.asDict(True) == {"key": 1, "value": {"name": "a", "age": 2}}
1194
1157
  True
1195
1158
  """
1196
1159
  if not hasattr(self, "__fields__"):
1197
- raise TypeError("Cannot convert a Row class into dict")
1160
+ msg = "Cannot convert a Row class into dict"
1161
+ raise TypeError(msg)
1198
1162
 
1199
1163
  if recursive:
1200
1164
 
1201
- def conv(obj: Any) -> Any:
1165
+ def conv(obj: Union[Row, list, dict, object]) -> Union[list, dict, object]:
1202
1166
  if isinstance(obj, Row):
1203
1167
  return obj.asDict(True)
1204
1168
  elif isinstance(obj, list):
1205
1169
  return [conv(o) for o in obj]
1206
1170
  elif isinstance(obj, dict):
1207
- return dict((k, conv(v)) for k, v in obj.items())
1171
+ return {k: conv(v) for k, v in obj.items()}
1208
1172
  else:
1209
1173
  return obj
1210
1174
 
@@ -1212,35 +1176,34 @@ class Row(tuple):
1212
1176
  else:
1213
1177
  return dict(zip(self.__fields__, self))
1214
1178
 
1215
- def __contains__(self, item: Any) -> bool:
1179
+ def __contains__(self, item: Any) -> bool: # noqa: D105, ANN401
1216
1180
  if hasattr(self, "__fields__"):
1217
1181
  return item in self.__fields__
1218
1182
  else:
1219
- return super(Row, self).__contains__(item)
1183
+ return super().__contains__(item)
1220
1184
 
1221
1185
  # let object acts like class
1222
- def __call__(self, *args: Any) -> "Row":
1223
- """create new Row object"""
1186
+ def __call__(self, *args: Any) -> "Row": # noqa: ANN401
1187
+ """Create new Row object."""
1224
1188
  if len(args) > len(self):
1225
- raise ValueError(
1226
- "Can not create Row with fields %s, expected %d values " "but got %s" % (self, len(self), args)
1227
- )
1189
+ msg = f"Can not create Row with fields {self}, expected {len(self):d} values but got {args}"
1190
+ raise ValueError(msg)
1228
1191
  return _create_row(self, args)
1229
1192
 
1230
- def __getitem__(self, item: Any) -> Any:
1193
+ def __getitem__(self, item: Any) -> Any: # noqa: D105, ANN401
1231
1194
  if isinstance(item, (int, slice)):
1232
- return super(Row, self).__getitem__(item)
1195
+ return super().__getitem__(item)
1233
1196
  try:
1234
1197
  # it will be slow when it has many fields,
1235
1198
  # but this will not be used in normal cases
1236
1199
  idx = self.__fields__.index(item)
1237
- return super(Row, self).__getitem__(idx)
1200
+ return super().__getitem__(idx)
1238
1201
  except IndexError:
1239
- raise KeyError(item)
1202
+ raise KeyError(item) # noqa: B904
1240
1203
  except ValueError:
1241
- raise ValueError(item)
1204
+ raise ValueError(item) # noqa: B904
1242
1205
 
1243
- def __getattr__(self, item: str) -> Any:
1206
+ def __getattr__(self, item: str) -> Any: # noqa: D105, ANN401
1244
1207
  if item.startswith("__"):
1245
1208
  raise AttributeError(item)
1246
1209
  try:
@@ -1249,18 +1212,19 @@ class Row(tuple):
1249
1212
  idx = self.__fields__.index(item)
1250
1213
  return self[idx]
1251
1214
  except IndexError:
1252
- raise AttributeError(item)
1215
+ raise AttributeError(item) # noqa: B904
1253
1216
  except ValueError:
1254
- raise AttributeError(item)
1217
+ raise AttributeError(item) # noqa: B904
1255
1218
 
1256
- def __setattr__(self, key: Any, value: Any) -> None:
1219
+ def __setattr__(self, key: Any, value: Any) -> None: # noqa: D105, ANN401
1257
1220
  if key != "__fields__":
1258
- raise RuntimeError("Row is read-only")
1221
+ msg = "Row is read-only"
1222
+ raise RuntimeError(msg)
1259
1223
  self.__dict__[key] = value
1260
1224
 
1261
1225
  def __reduce__(
1262
1226
  self,
1263
- ) -> Union[str, Tuple[Any, ...]]:
1227
+ ) -> Union[str, tuple[Any, ...]]:
1264
1228
  """Returns a tuple so Python knows how to pickle Row."""
1265
1229
  if hasattr(self, "__fields__"):
1266
1230
  return (_create_row, (self.__fields__, tuple(self)))
@@ -1270,6 +1234,6 @@ class Row(tuple):
1270
1234
  def __repr__(self) -> str:
1271
1235
  """Printable representation of Row used in Python REPL."""
1272
1236
  if hasattr(self, "__fields__"):
1273
- return "Row(%s)" % ", ".join("%s=%r" % (k, v) for k, v in zip(self.__fields__, tuple(self)))
1237
+ return "Row({})".format(", ".join(f"{k}={v!r}" for k, v in zip(self.__fields__, tuple(self))))
1274
1238
  else:
1275
- return "<Row(%s)>" % ", ".join("%r" % field for field in self)
1239
+ return "<Row({})>".format(", ".join(f"{field!r}" for field in self))