duckdb 1.4.1__cp39-cp39-macosx_10_9_universal2.whl → 1.5.0.dev44__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckdb might be problematic. Click here for more details.

Files changed (57) hide show
  1. _duckdb.cpython-39-darwin.so +0 -0
  2. duckdb/__init__.py +435 -341
  3. duckdb/__init__.pyi +713 -0
  4. duckdb/bytes_io_wrapper.py +9 -12
  5. duckdb/experimental/__init__.py +1 -2
  6. duckdb/experimental/spark/__init__.py +4 -3
  7. duckdb/experimental/spark/_globals.py +8 -8
  8. duckdb/experimental/spark/_typing.py +9 -7
  9. duckdb/experimental/spark/conf.py +15 -16
  10. duckdb/experimental/spark/context.py +44 -60
  11. duckdb/experimental/spark/errors/__init__.py +35 -33
  12. duckdb/experimental/spark/errors/error_classes.py +1 -1
  13. duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
  14. duckdb/experimental/spark/errors/exceptions/base.py +88 -39
  15. duckdb/experimental/spark/errors/utils.py +16 -11
  16. duckdb/experimental/spark/exception.py +6 -9
  17. duckdb/experimental/spark/sql/__init__.py +5 -5
  18. duckdb/experimental/spark/sql/_typing.py +15 -8
  19. duckdb/experimental/spark/sql/catalog.py +20 -21
  20. duckdb/experimental/spark/sql/column.py +55 -48
  21. duckdb/experimental/spark/sql/conf.py +8 -9
  22. duckdb/experimental/spark/sql/dataframe.py +233 -185
  23. duckdb/experimental/spark/sql/functions.py +1248 -1222
  24. duckdb/experimental/spark/sql/group.py +52 -56
  25. duckdb/experimental/spark/sql/readwriter.py +94 -80
  26. duckdb/experimental/spark/sql/session.py +59 -64
  27. duckdb/experimental/spark/sql/streaming.py +10 -9
  28. duckdb/experimental/spark/sql/type_utils.py +65 -67
  29. duckdb/experimental/spark/sql/types.py +345 -309
  30. duckdb/experimental/spark/sql/udf.py +6 -6
  31. duckdb/filesystem.py +16 -26
  32. duckdb/functional/__init__.py +16 -12
  33. duckdb/functional/__init__.pyi +31 -0
  34. duckdb/polars_io.py +83 -130
  35. duckdb/query_graph/__main__.py +96 -91
  36. duckdb/typing/__init__.py +8 -18
  37. duckdb/typing/__init__.pyi +36 -0
  38. duckdb/udf.py +5 -10
  39. duckdb/value/__init__.py +0 -1
  40. duckdb/value/constant/__init__.py +60 -62
  41. duckdb/value/constant/__init__.pyi +115 -0
  42. duckdb-1.5.0.dev44.dist-info/METADATA +80 -0
  43. duckdb-1.5.0.dev44.dist-info/RECORD +47 -0
  44. _duckdb-stubs/__init__.pyi +0 -1443
  45. _duckdb-stubs/_func.pyi +0 -46
  46. _duckdb-stubs/_sqltypes.pyi +0 -75
  47. adbc_driver_duckdb/__init__.py +0 -50
  48. adbc_driver_duckdb/dbapi.py +0 -115
  49. duckdb/_dbapi_type_object.py +0 -231
  50. duckdb/_version.py +0 -22
  51. duckdb/func/__init__.py +0 -3
  52. duckdb/sqltypes/__init__.py +0 -63
  53. duckdb-1.4.1.dist-info/METADATA +0 -326
  54. duckdb-1.4.1.dist-info/RECORD +0 -52
  55. /duckdb/{py.typed → value/__init__.pyi} +0 -0
  56. {duckdb-1.4.1.dist-info → duckdb-1.5.0.dev44.dist-info}/WHEEL +0 -0
  57. {duckdb-1.4.1.dist-info → duckdb-1.5.0.dev44.dist-info}/licenses/LICENSE +0 -0
@@ -1,28 +1,28 @@
1
- # ruff: noqa: D100
2
- # This code is based on code from Apache Spark under the license found in the LICENSE
3
- # file located in the 'spark' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'spark' folder.
4
2
 
5
- import calendar
6
- import datetime
7
- import math
8
- import re
9
- import time
10
- from builtins import tuple
11
- from collections.abc import Iterator, Mapping
12
- from types import MappingProxyType
13
3
  from typing import (
14
- Any,
15
- ClassVar,
16
- NoReturn,
17
- Optional,
18
- TypeVar,
19
- Union,
20
4
  cast,
21
5
  overload,
6
+ Dict,
7
+ Optional,
8
+ List,
9
+ Tuple,
10
+ Any,
11
+ Union,
12
+ Type,
13
+ TypeVar,
14
+ ClassVar,
15
+ Iterator,
22
16
  )
17
+ from builtins import tuple
18
+ import datetime
19
+ import calendar
20
+ import time
21
+ import math
22
+ import re
23
23
 
24
24
  import duckdb
25
- from duckdb.sqltypes import DuckDBPyType
25
+ from duckdb.typing import DuckDBPyType
26
26
 
27
27
  from ..exception import ContributionsAcceptedError
28
28
 
@@ -30,100 +30,105 @@ T = TypeVar("T")
30
30
  U = TypeVar("U")
31
31
 
32
32
  __all__ = [
33
- "ArrayType",
33
+ "DataType",
34
+ "NullType",
35
+ "StringType",
34
36
  "BinaryType",
37
+ "UUIDType",
35
38
  "BitstringType",
36
39
  "BooleanType",
37
- "ByteType",
38
- "DataType",
39
40
  "DateType",
40
- "DayTimeIntervalType",
41
+ "TimestampType",
42
+ "TimestampNTZType",
43
+ "TimestampNanosecondNTZType",
44
+ "TimestampMilisecondNTZType",
45
+ "TimestampSecondNTZType",
46
+ "TimeType",
47
+ "TimeNTZType",
41
48
  "DecimalType",
42
49
  "DoubleType",
43
50
  "FloatType",
44
- "HugeIntegerType",
51
+ "ByteType",
52
+ "UnsignedByteType",
53
+ "ShortType",
54
+ "UnsignedShortType",
45
55
  "IntegerType",
56
+ "UnsignedIntegerType",
46
57
  "LongType",
47
- "MapType",
48
- "NullType",
58
+ "UnsignedLongType",
59
+ "HugeIntegerType",
60
+ "UnsignedHugeIntegerType",
61
+ "DayTimeIntervalType",
49
62
  "Row",
50
- "ShortType",
51
- "StringType",
63
+ "ArrayType",
64
+ "MapType",
52
65
  "StructField",
53
66
  "StructType",
54
- "TimeNTZType",
55
- "TimeType",
56
- "TimestampMilisecondNTZType",
57
- "TimestampNTZType",
58
- "TimestampNanosecondNTZType",
59
- "TimestampSecondNTZType",
60
- "TimestampType",
61
- "UUIDType",
62
- "UnsignedByteType",
63
- "UnsignedHugeIntegerType",
64
- "UnsignedIntegerType",
65
- "UnsignedLongType",
66
- "UnsignedShortType",
67
67
  ]
68
68
 
69
69
 
70
70
  class DataType:
71
71
  """Base class for data types."""
72
72
 
73
- def __init__(self, duckdb_type: DuckDBPyType) -> None: # noqa: D107
73
+ def __init__(self, duckdb_type):
74
74
  self.duckdb_type = duckdb_type
75
75
 
76
- def __repr__(self) -> str: # noqa: D105
76
+ def __repr__(self) -> str:
77
77
  return self.__class__.__name__ + "()"
78
78
 
79
- def __hash__(self) -> int: # noqa: D105
79
+ def __hash__(self) -> int:
80
80
  return hash(str(self))
81
81
 
82
- def __eq__(self, other: object) -> bool: # noqa: D105
82
+ def __eq__(self, other: Any) -> bool:
83
83
  return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
84
84
 
85
- def __ne__(self, other: object) -> bool: # noqa: D105
85
+ def __ne__(self, other: Any) -> bool:
86
86
  return not self.__eq__(other)
87
87
 
88
88
  @classmethod
89
- def typeName(cls) -> str: # noqa: D102
89
+ def typeName(cls) -> str:
90
90
  return cls.__name__[:-4].lower()
91
91
 
92
- def simpleString(self) -> str: # noqa: D102
92
+ def simpleString(self) -> str:
93
93
  return self.typeName()
94
94
 
95
- def jsonValue(self) -> Union[str, dict[str, Any]]: # noqa: D102
95
+ def jsonValue(self) -> Union[str, Dict[str, Any]]:
96
96
  raise ContributionsAcceptedError
97
97
 
98
- def json(self) -> str: # noqa: D102
98
+ def json(self) -> str:
99
99
  raise ContributionsAcceptedError
100
100
 
101
101
  def needConversion(self) -> bool:
102
- """Does this type needs conversion between Python object and internal SQL object.
102
+ """
103
+ Does this type needs conversion between Python object and internal SQL object.
103
104
 
104
105
  This is used to avoid the unnecessary conversion for ArrayType/MapType/StructType.
105
106
  """
106
107
  return False
107
108
 
108
- def toInternal(self, obj: Any) -> Any: # noqa: ANN401
109
- """Converts a Python object into an internal SQL object."""
109
+ def toInternal(self, obj: Any) -> Any:
110
+ """
111
+ Converts a Python object into an internal SQL object.
112
+ """
110
113
  return obj
111
114
 
112
- def fromInternal(self, obj: Any) -> Any: # noqa: ANN401
113
- """Converts an internal SQL object into a native Python object."""
115
+ def fromInternal(self, obj: Any) -> Any:
116
+ """
117
+ Converts an internal SQL object into a native Python object.
118
+ """
114
119
  return obj
115
120
 
116
121
 
117
122
  # This singleton pattern does not work with pickle, you will get
118
123
  # another object after pickle and unpickle
119
124
  class DataTypeSingleton(type):
120
- """Metaclass for DataType."""
125
+ """Metaclass for DataType"""
121
126
 
122
- _instances: ClassVar[dict[type["DataTypeSingleton"], "DataTypeSingleton"]] = {}
127
+ _instances: ClassVar[Dict[Type["DataTypeSingleton"], "DataTypeSingleton"]] = {}
123
128
 
124
- def __call__(cls: type[T]) -> T: # type: ignore[override]
129
+ def __call__(cls: Type[T]) -> T: # type: ignore[override]
125
130
  if cls not in cls._instances: # type: ignore[attr-defined]
126
- cls._instances[cls] = super().__call__() # type: ignore[misc, attr-defined]
131
+ cls._instances[cls] = super(DataTypeSingleton, cls).__call__() # type: ignore[misc, attr-defined]
127
132
  return cls._instances[cls] # type: ignore[attr-defined]
128
133
 
129
134
 
@@ -133,18 +138,17 @@ class NullType(DataType, metaclass=DataTypeSingleton):
133
138
  The data type representing None, used for the types that cannot be inferred.
134
139
  """
135
140
 
136
- def __init__(self) -> None: # noqa: D107
141
+ def __init__(self):
137
142
  super().__init__(DuckDBPyType("NULL"))
138
143
 
139
144
  @classmethod
140
- def typeName(cls) -> str: # noqa: D102
145
+ def typeName(cls) -> str:
141
146
  return "void"
142
147
 
143
148
 
144
149
  class AtomicType(DataType):
145
150
  """An internal type used to represent everything that is not
146
- null, UDTs, arrays, structs, and maps.
147
- """ # noqa: D205
151
+ null, UDTs, arrays, structs, and maps."""
148
152
 
149
153
 
150
154
  class NumericType(AtomicType):
@@ -162,54 +166,54 @@ class FractionalType(NumericType):
162
166
  class StringType(AtomicType, metaclass=DataTypeSingleton):
163
167
  """String data type."""
164
168
 
165
- def __init__(self) -> None: # noqa: D107
169
+ def __init__(self):
166
170
  super().__init__(DuckDBPyType("VARCHAR"))
167
171
 
168
172
 
169
173
  class BitstringType(AtomicType, metaclass=DataTypeSingleton):
170
174
  """Bitstring data type."""
171
175
 
172
- def __init__(self) -> None: # noqa: D107
176
+ def __init__(self):
173
177
  super().__init__(DuckDBPyType("BIT"))
174
178
 
175
179
 
176
180
  class UUIDType(AtomicType, metaclass=DataTypeSingleton):
177
181
  """UUID data type."""
178
182
 
179
- def __init__(self) -> None: # noqa: D107
183
+ def __init__(self):
180
184
  super().__init__(DuckDBPyType("UUID"))
181
185
 
182
186
 
183
187
  class BinaryType(AtomicType, metaclass=DataTypeSingleton):
184
188
  """Binary (byte array) data type."""
185
189
 
186
- def __init__(self) -> None: # noqa: D107
190
+ def __init__(self):
187
191
  super().__init__(DuckDBPyType("BLOB"))
188
192
 
189
193
 
190
194
  class BooleanType(AtomicType, metaclass=DataTypeSingleton):
191
195
  """Boolean data type."""
192
196
 
193
- def __init__(self) -> None: # noqa: D107
197
+ def __init__(self):
194
198
  super().__init__(DuckDBPyType("BOOLEAN"))
195
199
 
196
200
 
197
201
  class DateType(AtomicType, metaclass=DataTypeSingleton):
198
202
  """Date (datetime.date) data type."""
199
203
 
200
- def __init__(self) -> None: # noqa: D107
204
+ def __init__(self):
201
205
  super().__init__(DuckDBPyType("DATE"))
202
206
 
203
207
  EPOCH_ORDINAL = datetime.datetime(1970, 1, 1).toordinal()
204
208
 
205
- def needConversion(self) -> bool: # noqa: D102
209
+ def needConversion(self) -> bool:
206
210
  return True
207
211
 
208
- def toInternal(self, d: datetime.date) -> int: # noqa: D102
212
+ def toInternal(self, d: datetime.date) -> int:
209
213
  if d is not None:
210
214
  return d.toordinal() - self.EPOCH_ORDINAL
211
215
 
212
- def fromInternal(self, v: int) -> datetime.date: # noqa: D102
216
+ def fromInternal(self, v: int) -> datetime.date:
213
217
  if v is not None:
214
218
  return datetime.date.fromordinal(v + self.EPOCH_ORDINAL)
215
219
 
@@ -217,22 +221,22 @@ class DateType(AtomicType, metaclass=DataTypeSingleton):
217
221
  class TimestampType(AtomicType, metaclass=DataTypeSingleton):
218
222
  """Timestamp (datetime.datetime) data type."""
219
223
 
220
- def __init__(self) -> None: # noqa: D107
224
+ def __init__(self):
221
225
  super().__init__(DuckDBPyType("TIMESTAMPTZ"))
222
226
 
223
227
  @classmethod
224
- def typeName(cls) -> str: # noqa: D102
228
+ def typeName(cls) -> str:
225
229
  return "timestamptz"
226
230
 
227
- def needConversion(self) -> bool: # noqa: D102
231
+ def needConversion(self) -> bool:
228
232
  return True
229
233
 
230
- def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
234
+ def toInternal(self, dt: datetime.datetime) -> int:
231
235
  if dt is not None:
232
236
  seconds = calendar.timegm(dt.utctimetuple()) if dt.tzinfo else time.mktime(dt.timetuple())
233
237
  return int(seconds) * 1000000 + dt.microsecond
234
238
 
235
- def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
239
+ def fromInternal(self, ts: int) -> datetime.datetime:
236
240
  if ts is not None:
237
241
  # using int to avoid precision loss in float
238
242
  return datetime.datetime.fromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
@@ -241,22 +245,22 @@ class TimestampType(AtomicType, metaclass=DataTypeSingleton):
241
245
  class TimestampNTZType(AtomicType, metaclass=DataTypeSingleton):
242
246
  """Timestamp (datetime.datetime) data type without timezone information with microsecond precision."""
243
247
 
244
- def __init__(self) -> None: # noqa: D107
248
+ def __init__(self):
245
249
  super().__init__(DuckDBPyType("TIMESTAMP"))
246
250
 
247
- def needConversion(self) -> bool: # noqa: D102
251
+ def needConversion(self) -> bool:
248
252
  return True
249
253
 
250
254
  @classmethod
251
- def typeName(cls) -> str: # noqa: D102
255
+ def typeName(cls) -> str:
252
256
  return "timestamp"
253
257
 
254
- def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
258
+ def toInternal(self, dt: datetime.datetime) -> int:
255
259
  if dt is not None:
256
260
  seconds = calendar.timegm(dt.timetuple())
257
261
  return int(seconds) * 1000000 + dt.microsecond
258
262
 
259
- def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
263
+ def fromInternal(self, ts: int) -> datetime.datetime:
260
264
  if ts is not None:
261
265
  # using int to avoid precision loss in float
262
266
  return datetime.datetime.utcfromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
@@ -265,60 +269,60 @@ class TimestampNTZType(AtomicType, metaclass=DataTypeSingleton):
265
269
  class TimestampSecondNTZType(AtomicType, metaclass=DataTypeSingleton):
266
270
  """Timestamp (datetime.datetime) data type without timezone information with second precision."""
267
271
 
268
- def __init__(self) -> None: # noqa: D107
272
+ def __init__(self):
269
273
  super().__init__(DuckDBPyType("TIMESTAMP_S"))
270
274
 
271
- def needConversion(self) -> bool: # noqa: D102
275
+ def needConversion(self) -> bool:
272
276
  return True
273
277
 
274
278
  @classmethod
275
- def typeName(cls) -> str: # noqa: D102
279
+ def typeName(cls) -> str:
276
280
  return "timestamp_s"
277
281
 
278
- def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
282
+ def toInternal(self, dt: datetime.datetime) -> int:
279
283
  raise ContributionsAcceptedError
280
284
 
281
- def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
285
+ def fromInternal(self, ts: int) -> datetime.datetime:
282
286
  raise ContributionsAcceptedError
283
287
 
284
288
 
285
289
  class TimestampMilisecondNTZType(AtomicType, metaclass=DataTypeSingleton):
286
290
  """Timestamp (datetime.datetime) data type without timezone information with milisecond precision."""
287
291
 
288
- def __init__(self) -> None: # noqa: D107
292
+ def __init__(self):
289
293
  super().__init__(DuckDBPyType("TIMESTAMP_MS"))
290
294
 
291
- def needConversion(self) -> bool: # noqa: D102
295
+ def needConversion(self) -> bool:
292
296
  return True
293
297
 
294
298
  @classmethod
295
- def typeName(cls) -> str: # noqa: D102
299
+ def typeName(cls) -> str:
296
300
  return "timestamp_ms"
297
301
 
298
- def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
302
+ def toInternal(self, dt: datetime.datetime) -> int:
299
303
  raise ContributionsAcceptedError
300
304
 
301
- def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
305
+ def fromInternal(self, ts: int) -> datetime.datetime:
302
306
  raise ContributionsAcceptedError
303
307
 
304
308
 
305
309
  class TimestampNanosecondNTZType(AtomicType, metaclass=DataTypeSingleton):
306
310
  """Timestamp (datetime.datetime) data type without timezone information with nanosecond precision."""
307
311
 
308
- def __init__(self) -> None: # noqa: D107
312
+ def __init__(self):
309
313
  super().__init__(DuckDBPyType("TIMESTAMP_NS"))
310
314
 
311
- def needConversion(self) -> bool: # noqa: D102
315
+ def needConversion(self) -> bool:
312
316
  return True
313
317
 
314
318
  @classmethod
315
- def typeName(cls) -> str: # noqa: D102
319
+ def typeName(cls) -> str:
316
320
  return "timestamp_ns"
317
321
 
318
- def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
322
+ def toInternal(self, dt: datetime.datetime) -> int:
319
323
  raise ContributionsAcceptedError
320
324
 
321
- def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
325
+ def fromInternal(self, ts: int) -> datetime.datetime:
322
326
  raise ContributionsAcceptedError
323
327
 
324
328
 
@@ -342,90 +346,90 @@ class DecimalType(FractionalType):
342
346
  the number of digits on right side of dot. (default: 0)
343
347
  """
344
348
 
345
- def __init__(self, precision: int = 10, scale: int = 0) -> None: # noqa: D107
349
+ def __init__(self, precision: int = 10, scale: int = 0):
346
350
  super().__init__(duckdb.decimal_type(precision, scale))
347
351
  self.precision = precision
348
352
  self.scale = scale
349
353
  self.hasPrecisionInfo = True # this is a public API
350
354
 
351
- def simpleString(self) -> str: # noqa: D102
352
- return f"decimal({int(self.precision):d},{int(self.scale):d})"
355
+ def simpleString(self) -> str:
356
+ return "decimal(%d,%d)" % (self.precision, self.scale)
353
357
 
354
- def __repr__(self) -> str: # noqa: D105
355
- return f"DecimalType({int(self.precision):d},{int(self.scale):d})"
358
+ def __repr__(self) -> str:
359
+ return "DecimalType(%d,%d)" % (self.precision, self.scale)
356
360
 
357
361
 
358
362
  class DoubleType(FractionalType, metaclass=DataTypeSingleton):
359
363
  """Double data type, representing double precision floats."""
360
364
 
361
- def __init__(self) -> None: # noqa: D107
365
+ def __init__(self):
362
366
  super().__init__(DuckDBPyType("DOUBLE"))
363
367
 
364
368
 
365
369
  class FloatType(FractionalType, metaclass=DataTypeSingleton):
366
370
  """Float data type, representing single precision floats."""
367
371
 
368
- def __init__(self) -> None: # noqa: D107
372
+ def __init__(self):
369
373
  super().__init__(DuckDBPyType("FLOAT"))
370
374
 
371
375
 
372
376
  class ByteType(IntegralType):
373
377
  """Byte data type, i.e. a signed integer in a single byte."""
374
378
 
375
- def __init__(self) -> None: # noqa: D107
379
+ def __init__(self):
376
380
  super().__init__(DuckDBPyType("TINYINT"))
377
381
 
378
- def simpleString(self) -> str: # noqa: D102
382
+ def simpleString(self) -> str:
379
383
  return "tinyint"
380
384
 
381
385
 
382
386
  class UnsignedByteType(IntegralType):
383
387
  """Unsigned byte data type, i.e. a unsigned integer in a single byte."""
384
388
 
385
- def __init__(self) -> None: # noqa: D107
389
+ def __init__(self):
386
390
  super().__init__(DuckDBPyType("UTINYINT"))
387
391
 
388
- def simpleString(self) -> str: # noqa: D102
392
+ def simpleString(self) -> str:
389
393
  return "utinyint"
390
394
 
391
395
 
392
396
  class ShortType(IntegralType):
393
397
  """Short data type, i.e. a signed 16-bit integer."""
394
398
 
395
- def __init__(self) -> None: # noqa: D107
399
+ def __init__(self):
396
400
  super().__init__(DuckDBPyType("SMALLINT"))
397
401
 
398
- def simpleString(self) -> str: # noqa: D102
402
+ def simpleString(self) -> str:
399
403
  return "smallint"
400
404
 
401
405
 
402
406
  class UnsignedShortType(IntegralType):
403
407
  """Unsigned short data type, i.e. a unsigned 16-bit integer."""
404
408
 
405
- def __init__(self) -> None: # noqa: D107
409
+ def __init__(self):
406
410
  super().__init__(DuckDBPyType("USMALLINT"))
407
411
 
408
- def simpleString(self) -> str: # noqa: D102
412
+ def simpleString(self) -> str:
409
413
  return "usmallint"
410
414
 
411
415
 
412
416
  class IntegerType(IntegralType):
413
417
  """Int data type, i.e. a signed 32-bit integer."""
414
418
 
415
- def __init__(self) -> None: # noqa: D107
419
+ def __init__(self):
416
420
  super().__init__(DuckDBPyType("INTEGER"))
417
421
 
418
- def simpleString(self) -> str: # noqa: D102
422
+ def simpleString(self) -> str:
419
423
  return "integer"
420
424
 
421
425
 
422
426
  class UnsignedIntegerType(IntegralType):
423
427
  """Unsigned int data type, i.e. a unsigned 32-bit integer."""
424
428
 
425
- def __init__(self) -> None: # noqa: D107
429
+ def __init__(self):
426
430
  super().__init__(DuckDBPyType("UINTEGER"))
427
431
 
428
- def simpleString(self) -> str: # noqa: D102
432
+ def simpleString(self) -> str:
429
433
  return "uinteger"
430
434
 
431
435
 
@@ -436,10 +440,10 @@ class LongType(IntegralType):
436
440
  please use :class:`DecimalType`.
437
441
  """
438
442
 
439
- def __init__(self) -> None: # noqa: D107
443
+ def __init__(self):
440
444
  super().__init__(DuckDBPyType("BIGINT"))
441
445
 
442
- def simpleString(self) -> str: # noqa: D102
446
+ def simpleString(self) -> str:
443
447
  return "bigint"
444
448
 
445
449
 
@@ -450,24 +454,24 @@ class UnsignedLongType(IntegralType):
450
454
  please use :class:`HugeIntegerType`.
451
455
  """
452
456
 
453
- def __init__(self) -> None: # noqa: D107
457
+ def __init__(self):
454
458
  super().__init__(DuckDBPyType("UBIGINT"))
455
459
 
456
- def simpleString(self) -> str: # noqa: D102
460
+ def simpleString(self) -> str:
457
461
  return "ubigint"
458
462
 
459
463
 
460
464
  class HugeIntegerType(IntegralType):
461
465
  """Huge integer data type, i.e. a signed 128-bit integer.
462
466
 
463
- If the values are beyond the range of [-170141183460469231731687303715884105728,
464
- 170141183460469231731687303715884105727], please use :class:`DecimalType`.
467
+ If the values are beyond the range of [-170141183460469231731687303715884105728, 170141183460469231731687303715884105727],
468
+ please use :class:`DecimalType`.
465
469
  """
466
470
 
467
- def __init__(self) -> None: # noqa: D107
471
+ def __init__(self):
468
472
  super().__init__(DuckDBPyType("HUGEINT"))
469
473
 
470
- def simpleString(self) -> str: # noqa: D102
474
+ def simpleString(self) -> str:
471
475
  return "hugeint"
472
476
 
473
477
 
@@ -478,30 +482,30 @@ class UnsignedHugeIntegerType(IntegralType):
478
482
  please use :class:`DecimalType`.
479
483
  """
480
484
 
481
- def __init__(self) -> None: # noqa: D107
485
+ def __init__(self):
482
486
  super().__init__(DuckDBPyType("UHUGEINT"))
483
487
 
484
- def simpleString(self) -> str: # noqa: D102
488
+ def simpleString(self) -> str:
485
489
  return "uhugeint"
486
490
 
487
491
 
488
492
  class TimeType(IntegralType):
489
493
  """Time (datetime.time) data type."""
490
494
 
491
- def __init__(self) -> None: # noqa: D107
495
+ def __init__(self):
492
496
  super().__init__(DuckDBPyType("TIMETZ"))
493
497
 
494
- def simpleString(self) -> str: # noqa: D102
498
+ def simpleString(self) -> str:
495
499
  return "timetz"
496
500
 
497
501
 
498
502
  class TimeNTZType(IntegralType):
499
503
  """Time (datetime.time) data type without timezone information."""
500
504
 
501
- def __init__(self) -> None: # noqa: D107
505
+ def __init__(self):
502
506
  super().__init__(DuckDBPyType("TIME"))
503
507
 
504
- def simpleString(self) -> str: # noqa: D102
508
+ def simpleString(self) -> str:
505
509
  return "time"
506
510
 
507
511
 
@@ -513,18 +517,16 @@ class DayTimeIntervalType(AtomicType):
513
517
  MINUTE = 2
514
518
  SECOND = 3
515
519
 
516
- _fields: Mapping[str, int] = MappingProxyType(
517
- {
518
- DAY: "day",
519
- HOUR: "hour",
520
- MINUTE: "minute",
521
- SECOND: "second",
522
- }
523
- )
520
+ _fields = {
521
+ DAY: "day",
522
+ HOUR: "hour",
523
+ MINUTE: "minute",
524
+ SECOND: "second",
525
+ }
524
526
 
525
- _inverted_fields: Mapping[int, str] = MappingProxyType(dict(zip(_fields.values(), _fields.keys())))
527
+ _inverted_fields = dict(zip(_fields.values(), _fields.keys()))
526
528
 
527
- def __init__(self, startField: Optional[int] = None, endField: Optional[int] = None) -> None: # noqa: D107
529
+ def __init__(self, startField: Optional[int] = None, endField: Optional[int] = None):
528
530
  super().__init__(DuckDBPyType("INTERVAL"))
529
531
  if startField is None and endField is None:
530
532
  # Default matched to scala side.
@@ -534,34 +536,33 @@ class DayTimeIntervalType(AtomicType):
534
536
  endField = startField
535
537
 
536
538
  fields = DayTimeIntervalType._fields
537
- if startField not in fields or endField not in fields:
538
- msg = f"interval {startField} to {endField} is invalid"
539
- raise RuntimeError(msg)
540
- self.startField = cast("int", startField)
541
- self.endField = cast("int", endField)
539
+ if startField not in fields.keys() or endField not in fields.keys():
540
+ raise RuntimeError("interval %s to %s is invalid" % (startField, endField))
541
+ self.startField = cast(int, startField)
542
+ self.endField = cast(int, endField)
542
543
 
543
544
  def _str_repr(self) -> str:
544
545
  fields = DayTimeIntervalType._fields
545
546
  start_field_name = fields[self.startField]
546
547
  end_field_name = fields[self.endField]
547
548
  if start_field_name == end_field_name:
548
- return f"interval {start_field_name}"
549
+ return "interval %s" % start_field_name
549
550
  else:
550
- return f"interval {start_field_name} to {end_field_name}"
551
+ return "interval %s to %s" % (start_field_name, end_field_name)
551
552
 
552
553
  simpleString = _str_repr
553
554
 
554
- def __repr__(self) -> str: # noqa: D105
555
- return f"{type(self).__name__}({int(self.startField):d}, {int(self.endField):d})"
555
+ def __repr__(self) -> str:
556
+ return "%s(%d, %d)" % (type(self).__name__, self.startField, self.endField)
556
557
 
557
- def needConversion(self) -> bool: # noqa: D102
558
+ def needConversion(self) -> bool:
558
559
  return True
559
560
 
560
- def toInternal(self, dt: datetime.timedelta) -> Optional[int]: # noqa: D102
561
+ def toInternal(self, dt: datetime.timedelta) -> Optional[int]:
561
562
  if dt is not None:
562
563
  return (math.floor(dt.total_seconds()) * 1000000) + dt.microseconds
563
564
 
564
- def fromInternal(self, micros: int) -> Optional[datetime.timedelta]: # noqa: D102
565
+ def fromInternal(self, micros: int) -> Optional[datetime.timedelta]:
565
566
  if micros is not None:
566
567
  return datetime.timedelta(microseconds=micros)
567
568
 
@@ -576,7 +577,7 @@ class ArrayType(DataType):
576
577
  containsNull : bool, optional
577
578
  whether the array can contain null (None) values.
578
579
 
579
- Examples:
580
+ Examples
580
581
  --------
581
582
  >>> ArrayType(StringType()) == ArrayType(StringType(), True)
582
583
  True
@@ -584,27 +585,30 @@ class ArrayType(DataType):
584
585
  False
585
586
  """
586
587
 
587
- def __init__(self, elementType: DataType, containsNull: bool = True) -> None: # noqa: D107
588
+ def __init__(self, elementType: DataType, containsNull: bool = True):
588
589
  super().__init__(duckdb.list_type(elementType.duckdb_type))
589
- assert isinstance(elementType, DataType), f"elementType {elementType} should be an instance of {DataType}"
590
+ assert isinstance(elementType, DataType), "elementType %s should be an instance of %s" % (
591
+ elementType,
592
+ DataType,
593
+ )
590
594
  self.elementType = elementType
591
595
  self.containsNull = containsNull
592
596
 
593
- def simpleString(self) -> str: # noqa: D102
594
- return f"array<{self.elementType.simpleString()}>"
597
+ def simpleString(self) -> str:
598
+ return "array<%s>" % self.elementType.simpleString()
595
599
 
596
- def __repr__(self) -> str: # noqa: D105
597
- return f"ArrayType({self.elementType}, {self.containsNull!s})"
600
+ def __repr__(self) -> str:
601
+ return "ArrayType(%s, %s)" % (self.elementType, str(self.containsNull))
598
602
 
599
- def needConversion(self) -> bool: # noqa: D102
603
+ def needConversion(self) -> bool:
600
604
  return self.elementType.needConversion()
601
605
 
602
- def toInternal(self, obj: list[Optional[T]]) -> list[Optional[T]]: # noqa: D102
606
+ def toInternal(self, obj: List[Optional[T]]) -> List[Optional[T]]:
603
607
  if not self.needConversion():
604
608
  return obj
605
609
  return obj and [self.elementType.toInternal(v) for v in obj]
606
610
 
607
- def fromInternal(self, obj: list[Optional[T]]) -> list[Optional[T]]: # noqa: D102
611
+ def fromInternal(self, obj: List[Optional[T]]) -> List[Optional[T]]:
608
612
  if not self.needConversion():
609
613
  return obj
610
614
  return obj and [self.elementType.fromInternal(v) for v in obj]
@@ -622,44 +626,59 @@ class MapType(DataType):
622
626
  valueContainsNull : bool, optional
623
627
  indicates whether values can contain null (None) values.
624
628
 
625
- Notes:
629
+ Notes
626
630
  -----
627
631
  Keys in a map data type are not allowed to be null (None).
628
632
 
629
- Examples:
633
+ Examples
630
634
  --------
631
- >>> (MapType(StringType(), IntegerType()) == MapType(StringType(), IntegerType(), True))
635
+ >>> (MapType(StringType(), IntegerType())
636
+ ... == MapType(StringType(), IntegerType(), True))
632
637
  True
633
- >>> (MapType(StringType(), IntegerType(), False) == MapType(StringType(), FloatType()))
638
+ >>> (MapType(StringType(), IntegerType(), False)
639
+ ... == MapType(StringType(), FloatType()))
634
640
  False
635
641
  """
636
642
 
637
- def __init__(self, keyType: DataType, valueType: DataType, valueContainsNull: bool = True) -> None: # noqa: D107
643
+ def __init__(self, keyType: DataType, valueType: DataType, valueContainsNull: bool = True):
638
644
  super().__init__(duckdb.map_type(keyType.duckdb_type, valueType.duckdb_type))
639
- assert isinstance(keyType, DataType), f"keyType {keyType} should be an instance of {DataType}"
640
- assert isinstance(valueType, DataType), f"valueType {valueType} should be an instance of {DataType}"
645
+ assert isinstance(keyType, DataType), "keyType %s should be an instance of %s" % (
646
+ keyType,
647
+ DataType,
648
+ )
649
+ assert isinstance(valueType, DataType), "valueType %s should be an instance of %s" % (
650
+ valueType,
651
+ DataType,
652
+ )
641
653
  self.keyType = keyType
642
654
  self.valueType = valueType
643
655
  self.valueContainsNull = valueContainsNull
644
656
 
645
- def simpleString(self) -> str: # noqa: D102
646
- return f"map<{self.keyType.simpleString()},{self.valueType.simpleString()}>"
657
+ def simpleString(self) -> str:
658
+ return "map<%s,%s>" % (
659
+ self.keyType.simpleString(),
660
+ self.valueType.simpleString(),
661
+ )
647
662
 
648
- def __repr__(self) -> str: # noqa: D105
649
- return f"MapType({self.keyType}, {self.valueType}, {self.valueContainsNull!s})"
663
+ def __repr__(self) -> str:
664
+ return "MapType(%s, %s, %s)" % (
665
+ self.keyType,
666
+ self.valueType,
667
+ str(self.valueContainsNull),
668
+ )
650
669
 
651
- def needConversion(self) -> bool: # noqa: D102
670
+ def needConversion(self) -> bool:
652
671
  return self.keyType.needConversion() or self.valueType.needConversion()
653
672
 
654
- def toInternal(self, obj: dict[T, Optional[U]]) -> dict[T, Optional[U]]: # noqa: D102
673
+ def toInternal(self, obj: Dict[T, Optional[U]]) -> Dict[T, Optional[U]]:
655
674
  if not self.needConversion():
656
675
  return obj
657
- return obj and {self.keyType.toInternal(k): self.valueType.toInternal(v) for k, v in obj.items()}
676
+ return obj and dict((self.keyType.toInternal(k), self.valueType.toInternal(v)) for k, v in obj.items())
658
677
 
659
- def fromInternal(self, obj: dict[T, Optional[U]]) -> dict[T, Optional[U]]: # noqa: D102
678
+ def fromInternal(self, obj: Dict[T, Optional[U]]) -> Dict[T, Optional[U]]:
660
679
  if not self.needConversion():
661
680
  return obj
662
- return obj and {self.keyType.fromInternal(k): self.valueType.fromInternal(v) for k, v in obj.items()}
681
+ return obj and dict((self.keyType.fromInternal(k), self.valueType.fromInternal(v)) for k, v in obj.items())
663
682
 
664
683
 
665
684
  class StructField(DataType):
@@ -676,58 +695,66 @@ class StructField(DataType):
676
695
  metadata : dict, optional
677
696
  a dict from string to simple type that can be toInternald to JSON automatically
678
697
 
679
- Examples:
698
+ Examples
680
699
  --------
681
- >>> (StructField("f1", StringType(), True) == StructField("f1", StringType(), True))
700
+ >>> (StructField("f1", StringType(), True)
701
+ ... == StructField("f1", StringType(), True))
682
702
  True
683
- >>> (StructField("f1", StringType(), True) == StructField("f2", StringType(), True))
703
+ >>> (StructField("f1", StringType(), True)
704
+ ... == StructField("f2", StringType(), True))
684
705
  False
685
706
  """
686
707
 
687
- def __init__( # noqa: D107
708
+ def __init__(
688
709
  self,
689
710
  name: str,
690
711
  dataType: DataType,
691
712
  nullable: bool = True,
692
- metadata: Optional[dict[str, Any]] = None,
693
- ) -> None:
713
+ metadata: Optional[Dict[str, Any]] = None,
714
+ ):
694
715
  super().__init__(dataType.duckdb_type)
695
- assert isinstance(dataType, DataType), f"dataType {dataType} should be an instance of {DataType}"
696
- assert isinstance(name, str), f"field name {name} should be a string"
716
+ assert isinstance(dataType, DataType), "dataType %s should be an instance of %s" % (
717
+ dataType,
718
+ DataType,
719
+ )
720
+ assert isinstance(name, str), "field name %s should be a string" % (name)
697
721
  self.name = name
698
722
  self.dataType = dataType
699
723
  self.nullable = nullable
700
724
  self.metadata = metadata or {}
701
725
 
702
- def simpleString(self) -> str: # noqa: D102
703
- return f"{self.name}:{self.dataType.simpleString()}"
726
+ def simpleString(self) -> str:
727
+ return "%s:%s" % (self.name, self.dataType.simpleString())
704
728
 
705
- def __repr__(self) -> str: # noqa: D105
706
- return f"StructField('{self.name}', {self.dataType}, {self.nullable!s})"
729
+ def __repr__(self) -> str:
730
+ return "StructField('%s', %s, %s)" % (
731
+ self.name,
732
+ self.dataType,
733
+ str(self.nullable),
734
+ )
707
735
 
708
- def needConversion(self) -> bool: # noqa: D102
736
+ def needConversion(self) -> bool:
709
737
  return self.dataType.needConversion()
710
738
 
711
- def toInternal(self, obj: T) -> T: # noqa: D102
739
+ def toInternal(self, obj: T) -> T:
712
740
  return self.dataType.toInternal(obj)
713
741
 
714
- def fromInternal(self, obj: T) -> T: # noqa: D102
742
+ def fromInternal(self, obj: T) -> T:
715
743
  return self.dataType.fromInternal(obj)
716
744
 
717
- def typeName(self) -> str: # type: ignore[override] # noqa: D102
718
- msg = "StructField does not have typeName. Use typeName on its type explicitly instead."
719
- raise TypeError(msg)
745
+ def typeName(self) -> str: # type: ignore[override]
746
+ raise TypeError("StructField does not have typeName. " "Use typeName on its type explicitly instead.")
720
747
 
721
748
 
722
749
  class StructType(DataType):
723
- r"""Struct type, consisting of a list of :class:`StructField`.
750
+ """Struct type, consisting of a list of :class:`StructField`.
724
751
 
725
752
  This is the data type representing a :class:`Row`.
726
753
 
727
754
  Iterating a :class:`StructType` will iterate over its :class:`StructField`\\s.
728
755
  A contained :class:`StructField` can be accessed by its name or position.
729
756
 
730
- Examples:
757
+ Examples
731
758
  --------
732
759
  >>> struct1 = StructType([StructField("f1", StringType(), True)])
733
760
  >>> struct1["f1"]
@@ -740,17 +767,16 @@ class StructType(DataType):
740
767
  >>> struct1 == struct2
741
768
  True
742
769
  >>> struct1 = StructType([StructField("f1", StringType(), True)])
743
- >>> struct2 = StructType(
744
- ... [StructField("f1", StringType(), True), StructField("f2", IntegerType(), False)]
745
- ... )
770
+ >>> struct2 = StructType([StructField("f1", StringType(), True),
771
+ ... StructField("f2", IntegerType(), False)])
746
772
  >>> struct1 == struct2
747
773
  False
748
774
  """
749
775
 
750
- def _update_internal_duckdb_type(self) -> None:
776
+ def _update_internal_duckdb_type(self):
751
777
  self.duckdb_type = duckdb.struct_type(dict(zip(self.names, [x.duckdb_type for x in self.fields])))
752
778
 
753
- def __init__(self, fields: Optional[list[StructField]] = None) -> None: # noqa: D107
779
+ def __init__(self, fields: Optional[List[StructField]] = None):
754
780
  if not fields:
755
781
  self.fields = []
756
782
  self.names = []
@@ -769,20 +795,23 @@ class StructType(DataType):
769
795
  field: str,
770
796
  data_type: Union[str, DataType],
771
797
  nullable: bool = True,
772
- metadata: Optional[dict[str, Any]] = None,
773
- ) -> "StructType": ...
798
+ metadata: Optional[Dict[str, Any]] = None,
799
+ ) -> "StructType":
800
+ ...
774
801
 
775
802
  @overload
776
- def add(self, field: StructField) -> "StructType": ...
803
+ def add(self, field: StructField) -> "StructType":
804
+ ...
777
805
 
778
806
  def add(
779
807
  self,
780
808
  field: Union[str, StructField],
781
809
  data_type: Optional[Union[str, DataType]] = None,
782
810
  nullable: bool = True,
783
- metadata: Optional[dict[str, Any]] = None,
811
+ metadata: Optional[Dict[str, Any]] = None,
784
812
  ) -> "StructType":
785
- r"""Construct a :class:`StructType` by adding new elements to it, to define the schema.
813
+ """
814
+ Construct a :class:`StructType` by adding new elements to it, to define the schema.
786
815
  The method accepts either:
787
816
 
788
817
  a) A single parameter which is a :class:`StructField` object.
@@ -801,11 +830,11 @@ class StructType(DataType):
801
830
  metadata : dict, optional
802
831
  Any additional metadata (default None)
803
832
 
804
- Returns:
833
+ Returns
805
834
  -------
806
835
  :class:`StructType`
807
836
 
808
- Examples:
837
+ Examples
809
838
  --------
810
839
  >>> struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
811
840
  >>> struct2 = StructType([StructField("f1", StringType(), True), \\
@@ -820,14 +849,13 @@ class StructType(DataType):
820
849
  >>> struct2 = StructType([StructField("f1", StringType(), True)])
821
850
  >>> struct1 == struct2
822
851
  True
823
- """ # noqa: D205, D415
852
+ """
824
853
  if isinstance(field, StructField):
825
854
  self.fields.append(field)
826
855
  self.names.append(field.name)
827
856
  else:
828
857
  if isinstance(field, str) and data_type is None:
829
- msg = "Must specify DataType if passing name of struct_field to create."
830
- raise ValueError(msg)
858
+ raise ValueError("Must specify DataType if passing name of struct_field to create.")
831
859
  else:
832
860
  data_type_f = data_type
833
861
  self.fields.append(StructField(field, data_type_f, nullable, metadata))
@@ -839,7 +867,7 @@ class StructType(DataType):
839
867
  return self
840
868
 
841
869
  def __iter__(self) -> Iterator[StructField]:
842
- """Iterate the fields."""
870
+ """Iterate the fields"""
843
871
  return iter(self.fields)
844
872
 
845
873
  def __len__(self) -> int:
@@ -852,30 +880,27 @@ class StructType(DataType):
852
880
  for field in self:
853
881
  if field.name == key:
854
882
  return field
855
- msg = f"No StructField named {key}"
856
- raise KeyError(msg)
883
+ raise KeyError("No StructField named {0}".format(key))
857
884
  elif isinstance(key, int):
858
885
  try:
859
886
  return self.fields[key]
860
887
  except IndexError:
861
- msg = "StructType index out of range"
862
- raise IndexError(msg) # noqa: B904
888
+ raise IndexError("StructType index out of range")
863
889
  elif isinstance(key, slice):
864
890
  return StructType(self.fields[key])
865
891
  else:
866
- msg = "StructType keys should be strings, integers or slices"
867
- raise TypeError(msg)
892
+ raise TypeError("StructType keys should be strings, integers or slices")
868
893
 
869
- def simpleString(self) -> str: # noqa: D102
870
- return "struct<{}>".format(",".join(f.simpleString() for f in self))
894
+ def simpleString(self) -> str:
895
+ return "struct<%s>" % (",".join(f.simpleString() for f in self))
871
896
 
872
- def __repr__(self) -> str: # noqa: D105
873
- return "StructType([{}])".format(", ".join(str(field) for field in self))
897
+ def __repr__(self) -> str:
898
+ return "StructType([%s])" % ", ".join(str(field) for field in self)
874
899
 
875
- def __contains__(self, item: str) -> bool: # noqa: D105
900
+ def __contains__(self, item: Any) -> bool:
876
901
  return item in self.names
877
902
 
878
- def extract_types_and_names(self) -> tuple[list[str], list[str]]: # noqa: D102
903
+ def extract_types_and_names(self) -> Tuple[List[str], List[str]]:
879
904
  names = []
880
905
  types = []
881
906
  for f in self.fields:
@@ -883,10 +908,11 @@ class StructType(DataType):
883
908
  names.append(f.name)
884
909
  return (types, names)
885
910
 
886
- def fieldNames(self) -> list[str]:
887
- """Returns all field names in a list.
911
+ def fieldNames(self) -> List[str]:
912
+ """
913
+ Returns all field names in a list.
888
914
 
889
- Examples:
915
+ Examples
890
916
  --------
891
917
  >>> struct = StructType([StructField("f1", StringType(), True)])
892
918
  >>> struct.fieldNames()
@@ -894,11 +920,11 @@ class StructType(DataType):
894
920
  """
895
921
  return list(self.names)
896
922
 
897
- def needConversion(self) -> bool: # noqa: D102
923
+ def needConversion(self) -> bool:
898
924
  # We need convert Row()/namedtuple into tuple()
899
925
  return True
900
926
 
901
- def toInternal(self, obj: tuple) -> tuple: # noqa: D102
927
+ def toInternal(self, obj: Tuple) -> Tuple:
902
928
  if obj is None:
903
929
  return
904
930
 
@@ -918,8 +944,7 @@ class StructType(DataType):
918
944
  for n, f, c in zip(self.names, self.fields, self._needConversion)
919
945
  )
920
946
  else:
921
- msg = f"Unexpected tuple {obj!r} with StructType"
922
- raise ValueError(msg)
947
+ raise ValueError("Unexpected tuple %r with StructType" % obj)
923
948
  else:
924
949
  if isinstance(obj, dict):
925
950
  return tuple(obj.get(n) for n in self.names)
@@ -929,17 +954,16 @@ class StructType(DataType):
929
954
  d = obj.__dict__
930
955
  return tuple(d.get(n) for n in self.names)
931
956
  else:
932
- msg = f"Unexpected tuple {obj!r} with StructType"
933
- raise ValueError(msg)
957
+ raise ValueError("Unexpected tuple %r with StructType" % obj)
934
958
 
935
- def fromInternal(self, obj: tuple) -> "Row": # noqa: D102
959
+ def fromInternal(self, obj: Tuple) -> "Row":
936
960
  if obj is None:
937
961
  return
938
962
  if isinstance(obj, Row):
939
963
  # it's already converted by pickler
940
964
  return obj
941
965
 
942
- values: Union[tuple, list]
966
+ values: Union[Tuple, List]
943
967
  if self._needSerializeAnyField:
944
968
  # Only calling fromInternal function for fields that need conversion
945
969
  values = [f.fromInternal(v) if c else v for f, v, c in zip(self.fields, obj, self._needConversion)]
@@ -949,7 +973,7 @@ class StructType(DataType):
949
973
 
950
974
 
951
975
  class UnionType(DataType):
952
- def __init__(self) -> None:
976
+ def __init__(self):
953
977
  raise ContributionsAcceptedError
954
978
 
955
979
 
@@ -959,7 +983,7 @@ class UserDefinedType(DataType):
959
983
  .. note:: WARN: Spark Internal Use Only
960
984
  """
961
985
 
962
- def __init__(self) -> None:
986
+ def __init__(self):
963
987
  raise ContributionsAcceptedError
964
988
 
965
989
  @classmethod
@@ -968,21 +992,24 @@ class UserDefinedType(DataType):
968
992
 
969
993
  @classmethod
970
994
  def sqlType(cls) -> DataType:
971
- """Underlying SQL storage type for this UDT."""
972
- msg = "UDT must implement sqlType()."
973
- raise NotImplementedError(msg)
995
+ """
996
+ Underlying SQL storage type for this UDT.
997
+ """
998
+ raise NotImplementedError("UDT must implement sqlType().")
974
999
 
975
1000
  @classmethod
976
1001
  def module(cls) -> str:
977
- """The Python module of the UDT."""
978
- msg = "UDT must implement module()."
979
- raise NotImplementedError(msg)
1002
+ """
1003
+ The Python module of the UDT.
1004
+ """
1005
+ raise NotImplementedError("UDT must implement module().")
980
1006
 
981
1007
  @classmethod
982
1008
  def scalaUDT(cls) -> str:
983
- """The class name of the paired Scala UDT (could be '', if there
1009
+ """
1010
+ The class name of the paired Scala UDT (could be '', if there
984
1011
  is no corresponding one).
985
- """ # noqa: D205
1012
+ """
986
1013
  return ""
987
1014
 
988
1015
  def needConversion(self) -> bool:
@@ -990,38 +1017,42 @@ class UserDefinedType(DataType):
990
1017
 
991
1018
  @classmethod
992
1019
  def _cachedSqlType(cls) -> DataType:
993
- """Cache the sqlType() into class, because it's heavily used in `toInternal`."""
1020
+ """
1021
+ Cache the sqlType() into class, because it's heavily used in `toInternal`.
1022
+ """
994
1023
  if not hasattr(cls, "_cached_sql_type"):
995
1024
  cls._cached_sql_type = cls.sqlType() # type: ignore[attr-defined]
996
1025
  return cls._cached_sql_type # type: ignore[attr-defined]
997
1026
 
998
- def toInternal(self, obj: Any) -> Any: # noqa: ANN401
1027
+ def toInternal(self, obj: Any) -> Any:
999
1028
  if obj is not None:
1000
1029
  return self._cachedSqlType().toInternal(self.serialize(obj))
1001
1030
 
1002
- def fromInternal(self, obj: Any) -> Any: # noqa: ANN401
1031
+ def fromInternal(self, obj: Any) -> Any:
1003
1032
  v = self._cachedSqlType().fromInternal(obj)
1004
1033
  if v is not None:
1005
1034
  return self.deserialize(v)
1006
1035
 
1007
- def serialize(self, obj: Any) -> NoReturn: # noqa: ANN401
1008
- """Converts a user-type object into a SQL datum."""
1009
- msg = "UDT must implement toInternal()."
1010
- raise NotImplementedError(msg)
1036
+ def serialize(self, obj: Any) -> Any:
1037
+ """
1038
+ Converts a user-type object into a SQL datum.
1039
+ """
1040
+ raise NotImplementedError("UDT must implement toInternal().")
1011
1041
 
1012
- def deserialize(self, datum: Any) -> NoReturn: # noqa: ANN401
1013
- """Converts a SQL datum into a user-type object."""
1014
- msg = "UDT must implement fromInternal()."
1015
- raise NotImplementedError(msg)
1042
+ def deserialize(self, datum: Any) -> Any:
1043
+ """
1044
+ Converts a SQL datum into a user-type object.
1045
+ """
1046
+ raise NotImplementedError("UDT must implement fromInternal().")
1016
1047
 
1017
1048
  def simpleString(self) -> str:
1018
1049
  return "udt"
1019
1050
 
1020
- def __eq__(self, other: object) -> bool:
1021
- return type(self) is type(other)
1051
+ def __eq__(self, other: Any) -> bool:
1052
+ return type(self) == type(other)
1022
1053
 
1023
1054
 
1024
- _atomic_types: list[type[DataType]] = [
1055
+ _atomic_types: List[Type[DataType]] = [
1025
1056
  StringType,
1026
1057
  BinaryType,
1027
1058
  BooleanType,
@@ -1037,28 +1068,32 @@ _atomic_types: list[type[DataType]] = [
1037
1068
  TimestampNTZType,
1038
1069
  NullType,
1039
1070
  ]
1040
- _all_atomic_types: dict[str, type[DataType]] = {t.typeName(): t for t in _atomic_types}
1071
+ _all_atomic_types: Dict[str, Type[DataType]] = dict((t.typeName(), t) for t in _atomic_types)
1041
1072
 
1042
- _complex_types: list[type[Union[ArrayType, MapType, StructType]]] = [
1073
+ _complex_types: List[Type[Union[ArrayType, MapType, StructType]]] = [
1043
1074
  ArrayType,
1044
1075
  MapType,
1045
1076
  StructType,
1046
1077
  ]
1047
- _all_complex_types: dict[str, type[Union[ArrayType, MapType, StructType]]] = {v.typeName(): v for v in _complex_types}
1078
+ _all_complex_types: Dict[str, Type[Union[ArrayType, MapType, StructType]]] = dict(
1079
+ (v.typeName(), v) for v in _complex_types
1080
+ )
1048
1081
 
1049
1082
 
1050
1083
  _FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)")
1051
1084
  _INTERVAL_DAYTIME = re.compile(r"interval (day|hour|minute|second)( to (day|hour|minute|second))?")
1052
1085
 
1053
1086
 
1054
- def _create_row(fields: Union["Row", list[str]], values: Union[tuple[Any, ...], list[Any]]) -> "Row":
1087
+ def _create_row(fields: Union["Row", List[str]], values: Union[Tuple[Any, ...], List[Any]]) -> "Row":
1055
1088
  row = Row(*values)
1056
1089
  row.__fields__ = fields
1057
1090
  return row
1058
1091
 
1059
1092
 
1060
1093
  class Row(tuple):
1061
- """A row in :class:`DataFrame`.
1094
+
1095
+ """
1096
+ A row in :class:`DataFrame`.
1062
1097
  The fields in it can be accessed:
1063
1098
 
1064
1099
  * like attributes (``row.key``)
@@ -1075,18 +1110,18 @@ class Row(tuple):
1075
1110
  field names sorted alphabetically and will be ordered in the position as
1076
1111
  entered.
1077
1112
 
1078
- Examples:
1113
+ Examples
1079
1114
  --------
1080
1115
  >>> row = Row(name="Alice", age=11)
1081
1116
  >>> row
1082
1117
  Row(name='Alice', age=11)
1083
- >>> row["name"], row["age"]
1118
+ >>> row['name'], row['age']
1084
1119
  ('Alice', 11)
1085
1120
  >>> row.name, row.age
1086
1121
  ('Alice', 11)
1087
- >>> "name" in row
1122
+ >>> 'name' in row
1088
1123
  True
1089
- >>> "wrong_key" in row
1124
+ >>> 'wrong_key' in row
1090
1125
  False
1091
1126
 
1092
1127
  Row also can be used to create another Row like class, then it
@@ -1095,9 +1130,9 @@ class Row(tuple):
1095
1130
  >>> Person = Row("name", "age")
1096
1131
  >>> Person
1097
1132
  <Row('name', 'age')>
1098
- >>> "name" in Person
1133
+ >>> 'name' in Person
1099
1134
  True
1100
- >>> "wrong_key" in Person
1135
+ >>> 'wrong_key' in Person
1101
1136
  False
1102
1137
  >>> Person("Alice", 11)
1103
1138
  Row(name='Alice', age=11)
@@ -1109,18 +1144,19 @@ class Row(tuple):
1109
1144
  >>> row2 = Row(name="Alice", age=11)
1110
1145
  >>> row1 == row2
1111
1146
  True
1112
- """ # noqa: D205, D415
1147
+ """
1113
1148
 
1114
1149
  @overload
1115
- def __new__(cls, *args: str) -> "Row": ...
1150
+ def __new__(cls, *args: str) -> "Row":
1151
+ ...
1116
1152
 
1117
1153
  @overload
1118
- def __new__(cls, **kwargs: Any) -> "Row": ... # noqa: ANN401
1154
+ def __new__(cls, **kwargs: Any) -> "Row":
1155
+ ...
1119
1156
 
1120
- def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row": # noqa: D102
1157
+ def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row":
1121
1158
  if args and kwargs:
1122
- msg = "Can not use both args and kwargs to create Row"
1123
- raise ValueError(msg)
1159
+ raise ValueError("Can not use both args " "and kwargs to create Row")
1124
1160
  if kwargs:
1125
1161
  # create row objects
1126
1162
  row = tuple.__new__(cls, list(kwargs.values()))
@@ -1130,15 +1166,16 @@ class Row(tuple):
1130
1166
  # create row class or objects
1131
1167
  return tuple.__new__(cls, args)
1132
1168
 
1133
- def asDict(self, recursive: bool = False) -> dict[str, Any]:
1134
- """Return as a dict.
1169
+ def asDict(self, recursive: bool = False) -> Dict[str, Any]:
1170
+ """
1171
+ Return as a dict
1135
1172
 
1136
1173
  Parameters
1137
1174
  ----------
1138
1175
  recursive : bool, optional
1139
1176
  turns the nested Rows to dict (default: False).
1140
1177
 
1141
- Notes:
1178
+ Notes
1142
1179
  -----
1143
1180
  If a row contains duplicate field names, e.g., the rows of a join
1144
1181
  between two :class:`DataFrame` that both have the fields of same names,
@@ -1146,29 +1183,28 @@ class Row(tuple):
1146
1183
  will also return one of the duplicate fields, however returned value might
1147
1184
  be different to ``asDict``.
1148
1185
 
1149
- Examples:
1186
+ Examples
1150
1187
  --------
1151
- >>> Row(name="Alice", age=11).asDict() == {"name": "Alice", "age": 11}
1188
+ >>> Row(name="Alice", age=11).asDict() == {'name': 'Alice', 'age': 11}
1152
1189
  True
1153
- >>> row = Row(key=1, value=Row(name="a", age=2))
1154
- >>> row.asDict() == {"key": 1, "value": Row(name="a", age=2)}
1190
+ >>> row = Row(key=1, value=Row(name='a', age=2))
1191
+ >>> row.asDict() == {'key': 1, 'value': Row(name='a', age=2)}
1155
1192
  True
1156
- >>> row.asDict(True) == {"key": 1, "value": {"name": "a", "age": 2}}
1193
+ >>> row.asDict(True) == {'key': 1, 'value': {'name': 'a', 'age': 2}}
1157
1194
  True
1158
1195
  """
1159
1196
  if not hasattr(self, "__fields__"):
1160
- msg = "Cannot convert a Row class into dict"
1161
- raise TypeError(msg)
1197
+ raise TypeError("Cannot convert a Row class into dict")
1162
1198
 
1163
1199
  if recursive:
1164
1200
 
1165
- def conv(obj: Union[Row, list, dict, object]) -> Union[list, dict, object]:
1201
+ def conv(obj: Any) -> Any:
1166
1202
  if isinstance(obj, Row):
1167
1203
  return obj.asDict(True)
1168
1204
  elif isinstance(obj, list):
1169
1205
  return [conv(o) for o in obj]
1170
1206
  elif isinstance(obj, dict):
1171
- return {k: conv(v) for k, v in obj.items()}
1207
+ return dict((k, conv(v)) for k, v in obj.items())
1172
1208
  else:
1173
1209
  return obj
1174
1210
 
@@ -1176,34 +1212,35 @@ class Row(tuple):
1176
1212
  else:
1177
1213
  return dict(zip(self.__fields__, self))
1178
1214
 
1179
- def __contains__(self, item: Any) -> bool: # noqa: D105, ANN401
1215
+ def __contains__(self, item: Any) -> bool:
1180
1216
  if hasattr(self, "__fields__"):
1181
1217
  return item in self.__fields__
1182
1218
  else:
1183
- return super().__contains__(item)
1219
+ return super(Row, self).__contains__(item)
1184
1220
 
1185
1221
  # let object acts like class
1186
- def __call__(self, *args: Any) -> "Row": # noqa: ANN401
1187
- """Create new Row object."""
1222
+ def __call__(self, *args: Any) -> "Row":
1223
+ """create new Row object"""
1188
1224
  if len(args) > len(self):
1189
- msg = f"Can not create Row with fields {self}, expected {len(self):d} values but got {args}"
1190
- raise ValueError(msg)
1225
+ raise ValueError(
1226
+ "Can not create Row with fields %s, expected %d values " "but got %s" % (self, len(self), args)
1227
+ )
1191
1228
  return _create_row(self, args)
1192
1229
 
1193
- def __getitem__(self, item: Any) -> Any: # noqa: D105, ANN401
1230
+ def __getitem__(self, item: Any) -> Any:
1194
1231
  if isinstance(item, (int, slice)):
1195
- return super().__getitem__(item)
1232
+ return super(Row, self).__getitem__(item)
1196
1233
  try:
1197
1234
  # it will be slow when it has many fields,
1198
1235
  # but this will not be used in normal cases
1199
1236
  idx = self.__fields__.index(item)
1200
- return super().__getitem__(idx)
1237
+ return super(Row, self).__getitem__(idx)
1201
1238
  except IndexError:
1202
- raise KeyError(item) # noqa: B904
1239
+ raise KeyError(item)
1203
1240
  except ValueError:
1204
- raise ValueError(item) # noqa: B904
1241
+ raise ValueError(item)
1205
1242
 
1206
- def __getattr__(self, item: str) -> Any: # noqa: D105, ANN401
1243
+ def __getattr__(self, item: str) -> Any:
1207
1244
  if item.startswith("__"):
1208
1245
  raise AttributeError(item)
1209
1246
  try:
@@ -1212,19 +1249,18 @@ class Row(tuple):
1212
1249
  idx = self.__fields__.index(item)
1213
1250
  return self[idx]
1214
1251
  except IndexError:
1215
- raise AttributeError(item) # noqa: B904
1252
+ raise AttributeError(item)
1216
1253
  except ValueError:
1217
- raise AttributeError(item) # noqa: B904
1254
+ raise AttributeError(item)
1218
1255
 
1219
- def __setattr__(self, key: Any, value: Any) -> None: # noqa: D105, ANN401
1256
+ def __setattr__(self, key: Any, value: Any) -> None:
1220
1257
  if key != "__fields__":
1221
- msg = "Row is read-only"
1222
- raise RuntimeError(msg)
1258
+ raise RuntimeError("Row is read-only")
1223
1259
  self.__dict__[key] = value
1224
1260
 
1225
1261
  def __reduce__(
1226
1262
  self,
1227
- ) -> Union[str, tuple[Any, ...]]:
1263
+ ) -> Union[str, Tuple[Any, ...]]:
1228
1264
  """Returns a tuple so Python knows how to pickle Row."""
1229
1265
  if hasattr(self, "__fields__"):
1230
1266
  return (_create_row, (self.__fields__, tuple(self)))
@@ -1234,6 +1270,6 @@ class Row(tuple):
1234
1270
  def __repr__(self) -> str:
1235
1271
  """Printable representation of Row used in Python REPL."""
1236
1272
  if hasattr(self, "__fields__"):
1237
- return "Row({})".format(", ".join(f"{k}={v!r}" for k, v in zip(self.__fields__, tuple(self))))
1273
+ return "Row(%s)" % ", ".join("%s=%r" % (k, v) for k, v in zip(self.__fields__, tuple(self)))
1238
1274
  else:
1239
- return "<Row({})>".format(", ".join(f"{field!r}" for field in self))
1275
+ return "<Row(%s)>" % ", ".join("%r" % field for field in self)