duckdb 1.5.0.dev86__cp314-cp314-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckdb might be problematic. Click here for more details.

Files changed (52) hide show
  1. _duckdb-stubs/__init__.pyi +1443 -0
  2. _duckdb-stubs/_func.pyi +46 -0
  3. _duckdb-stubs/_sqltypes.pyi +75 -0
  4. _duckdb.cpython-314-darwin.so +0 -0
  5. adbc_driver_duckdb/__init__.py +50 -0
  6. adbc_driver_duckdb/dbapi.py +115 -0
  7. duckdb/__init__.py +381 -0
  8. duckdb/_dbapi_type_object.py +231 -0
  9. duckdb/_version.py +22 -0
  10. duckdb/bytes_io_wrapper.py +69 -0
  11. duckdb/experimental/__init__.py +3 -0
  12. duckdb/experimental/spark/LICENSE +260 -0
  13. duckdb/experimental/spark/__init__.py +6 -0
  14. duckdb/experimental/spark/_globals.py +77 -0
  15. duckdb/experimental/spark/_typing.py +46 -0
  16. duckdb/experimental/spark/conf.py +46 -0
  17. duckdb/experimental/spark/context.py +180 -0
  18. duckdb/experimental/spark/errors/__init__.py +70 -0
  19. duckdb/experimental/spark/errors/error_classes.py +918 -0
  20. duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
  21. duckdb/experimental/spark/errors/exceptions/base.py +168 -0
  22. duckdb/experimental/spark/errors/utils.py +111 -0
  23. duckdb/experimental/spark/exception.py +18 -0
  24. duckdb/experimental/spark/sql/__init__.py +7 -0
  25. duckdb/experimental/spark/sql/_typing.py +86 -0
  26. duckdb/experimental/spark/sql/catalog.py +79 -0
  27. duckdb/experimental/spark/sql/column.py +361 -0
  28. duckdb/experimental/spark/sql/conf.py +24 -0
  29. duckdb/experimental/spark/sql/dataframe.py +1389 -0
  30. duckdb/experimental/spark/sql/functions.py +6195 -0
  31. duckdb/experimental/spark/sql/group.py +424 -0
  32. duckdb/experimental/spark/sql/readwriter.py +435 -0
  33. duckdb/experimental/spark/sql/session.py +297 -0
  34. duckdb/experimental/spark/sql/streaming.py +36 -0
  35. duckdb/experimental/spark/sql/type_utils.py +107 -0
  36. duckdb/experimental/spark/sql/types.py +1239 -0
  37. duckdb/experimental/spark/sql/udf.py +37 -0
  38. duckdb/filesystem.py +33 -0
  39. duckdb/func/__init__.py +3 -0
  40. duckdb/functional/__init__.py +13 -0
  41. duckdb/polars_io.py +284 -0
  42. duckdb/py.typed +0 -0
  43. duckdb/query_graph/__main__.py +358 -0
  44. duckdb/sqltypes/__init__.py +63 -0
  45. duckdb/typing/__init__.py +71 -0
  46. duckdb/udf.py +24 -0
  47. duckdb/value/__init__.py +1 -0
  48. duckdb/value/constant/__init__.py +270 -0
  49. duckdb-1.5.0.dev86.dist-info/METADATA +88 -0
  50. duckdb-1.5.0.dev86.dist-info/RECORD +52 -0
  51. duckdb-1.5.0.dev86.dist-info/WHEEL +6 -0
  52. duckdb-1.5.0.dev86.dist-info/licenses/LICENSE +7 -0
@@ -0,0 +1,1239 @@
1
+ # ruff: noqa: D100
2
+ # This code is based on code from Apache Spark under the license found in the LICENSE
3
+ # file located in the 'spark' folder.
4
+
5
+ import calendar
6
+ import datetime
7
+ import math
8
+ import re
9
+ import time
10
+ from builtins import tuple
11
+ from collections.abc import Iterator, Mapping
12
+ from types import MappingProxyType
13
+ from typing import (
14
+ Any,
15
+ ClassVar,
16
+ NoReturn,
17
+ Optional,
18
+ TypeVar,
19
+ Union,
20
+ cast,
21
+ overload,
22
+ )
23
+
24
+ import duckdb
25
+ from duckdb.sqltypes import DuckDBPyType
26
+
27
+ from ..exception import ContributionsAcceptedError
28
+
29
+ T = TypeVar("T")
30
+ U = TypeVar("U")
31
+
32
+ __all__ = [
33
+ "ArrayType",
34
+ "BinaryType",
35
+ "BitstringType",
36
+ "BooleanType",
37
+ "ByteType",
38
+ "DataType",
39
+ "DateType",
40
+ "DayTimeIntervalType",
41
+ "DecimalType",
42
+ "DoubleType",
43
+ "FloatType",
44
+ "HugeIntegerType",
45
+ "IntegerType",
46
+ "LongType",
47
+ "MapType",
48
+ "NullType",
49
+ "Row",
50
+ "ShortType",
51
+ "StringType",
52
+ "StructField",
53
+ "StructType",
54
+ "TimeNTZType",
55
+ "TimeType",
56
+ "TimestampMilisecondNTZType",
57
+ "TimestampNTZType",
58
+ "TimestampNanosecondNTZType",
59
+ "TimestampSecondNTZType",
60
+ "TimestampType",
61
+ "UUIDType",
62
+ "UnsignedByteType",
63
+ "UnsignedHugeIntegerType",
64
+ "UnsignedIntegerType",
65
+ "UnsignedLongType",
66
+ "UnsignedShortType",
67
+ ]
68
+
69
+
70
+ class DataType:
71
+ """Base class for data types."""
72
+
73
+ def __init__(self, duckdb_type: DuckDBPyType) -> None: # noqa: D107
74
+ self.duckdb_type = duckdb_type
75
+
76
+ def __repr__(self) -> str: # noqa: D105
77
+ return self.__class__.__name__ + "()"
78
+
79
+ def __hash__(self) -> int: # noqa: D105
80
+ return hash(str(self))
81
+
82
+ def __eq__(self, other: object) -> bool: # noqa: D105
83
+ return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
84
+
85
+ def __ne__(self, other: object) -> bool: # noqa: D105
86
+ return not self.__eq__(other)
87
+
88
+ @classmethod
89
+ def typeName(cls) -> str: # noqa: D102
90
+ return cls.__name__[:-4].lower()
91
+
92
+ def simpleString(self) -> str: # noqa: D102
93
+ return self.typeName()
94
+
95
+ def jsonValue(self) -> Union[str, dict[str, Any]]: # noqa: D102
96
+ raise ContributionsAcceptedError
97
+
98
+ def json(self) -> str: # noqa: D102
99
+ raise ContributionsAcceptedError
100
+
101
+ def needConversion(self) -> bool:
102
+ """Does this type needs conversion between Python object and internal SQL object.
103
+
104
+ This is used to avoid the unnecessary conversion for ArrayType/MapType/StructType.
105
+ """
106
+ return False
107
+
108
+ def toInternal(self, obj: Any) -> Any: # noqa: ANN401
109
+ """Converts a Python object into an internal SQL object."""
110
+ return obj
111
+
112
+ def fromInternal(self, obj: Any) -> Any: # noqa: ANN401
113
+ """Converts an internal SQL object into a native Python object."""
114
+ return obj
115
+
116
+
117
+ # This singleton pattern does not work with pickle, you will get
118
+ # another object after pickle and unpickle
119
+ class DataTypeSingleton(type):
120
+ """Metaclass for DataType."""
121
+
122
+ _instances: ClassVar[dict[type["DataTypeSingleton"], "DataTypeSingleton"]] = {}
123
+
124
+ def __call__(cls: type[T]) -> T: # type: ignore[override]
125
+ if cls not in cls._instances: # type: ignore[attr-defined]
126
+ cls._instances[cls] = super().__call__() # type: ignore[misc, attr-defined]
127
+ return cls._instances[cls] # type: ignore[attr-defined]
128
+
129
+
130
+ class NullType(DataType, metaclass=DataTypeSingleton):
131
+ """Null type.
132
+
133
+ The data type representing None, used for the types that cannot be inferred.
134
+ """
135
+
136
+ def __init__(self) -> None: # noqa: D107
137
+ super().__init__(DuckDBPyType("NULL"))
138
+
139
+ @classmethod
140
+ def typeName(cls) -> str: # noqa: D102
141
+ return "void"
142
+
143
+
144
+ class AtomicType(DataType):
145
+ """An internal type used to represent everything that is not
146
+ null, UDTs, arrays, structs, and maps.
147
+ """ # noqa: D205
148
+
149
+
150
+ class NumericType(AtomicType):
151
+ """Numeric data types."""
152
+
153
+
154
+ class IntegralType(NumericType, metaclass=DataTypeSingleton):
155
+ """Integral data types."""
156
+
157
+
158
+ class FractionalType(NumericType):
159
+ """Fractional data types."""
160
+
161
+
162
+ class StringType(AtomicType, metaclass=DataTypeSingleton):
163
+ """String data type."""
164
+
165
+ def __init__(self) -> None: # noqa: D107
166
+ super().__init__(DuckDBPyType("VARCHAR"))
167
+
168
+
169
+ class BitstringType(AtomicType, metaclass=DataTypeSingleton):
170
+ """Bitstring data type."""
171
+
172
+ def __init__(self) -> None: # noqa: D107
173
+ super().__init__(DuckDBPyType("BIT"))
174
+
175
+
176
+ class UUIDType(AtomicType, metaclass=DataTypeSingleton):
177
+ """UUID data type."""
178
+
179
+ def __init__(self) -> None: # noqa: D107
180
+ super().__init__(DuckDBPyType("UUID"))
181
+
182
+
183
+ class BinaryType(AtomicType, metaclass=DataTypeSingleton):
184
+ """Binary (byte array) data type."""
185
+
186
+ def __init__(self) -> None: # noqa: D107
187
+ super().__init__(DuckDBPyType("BLOB"))
188
+
189
+
190
+ class BooleanType(AtomicType, metaclass=DataTypeSingleton):
191
+ """Boolean data type."""
192
+
193
+ def __init__(self) -> None: # noqa: D107
194
+ super().__init__(DuckDBPyType("BOOLEAN"))
195
+
196
+
197
+ class DateType(AtomicType, metaclass=DataTypeSingleton):
198
+ """Date (datetime.date) data type."""
199
+
200
+ def __init__(self) -> None: # noqa: D107
201
+ super().__init__(DuckDBPyType("DATE"))
202
+
203
+ EPOCH_ORDINAL = datetime.datetime(1970, 1, 1).toordinal()
204
+
205
+ def needConversion(self) -> bool: # noqa: D102
206
+ return True
207
+
208
+ def toInternal(self, d: datetime.date) -> int: # noqa: D102
209
+ if d is not None:
210
+ return d.toordinal() - self.EPOCH_ORDINAL
211
+
212
+ def fromInternal(self, v: int) -> datetime.date: # noqa: D102
213
+ if v is not None:
214
+ return datetime.date.fromordinal(v + self.EPOCH_ORDINAL)
215
+
216
+
217
+ class TimestampType(AtomicType, metaclass=DataTypeSingleton):
218
+ """Timestamp (datetime.datetime) data type."""
219
+
220
+ def __init__(self) -> None: # noqa: D107
221
+ super().__init__(DuckDBPyType("TIMESTAMPTZ"))
222
+
223
+ @classmethod
224
+ def typeName(cls) -> str: # noqa: D102
225
+ return "timestamptz"
226
+
227
+ def needConversion(self) -> bool: # noqa: D102
228
+ return True
229
+
230
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
231
+ if dt is not None:
232
+ seconds = calendar.timegm(dt.utctimetuple()) if dt.tzinfo else time.mktime(dt.timetuple())
233
+ return int(seconds) * 1000000 + dt.microsecond
234
+
235
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
236
+ if ts is not None:
237
+ # using int to avoid precision loss in float
238
+ return datetime.datetime.fromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
239
+
240
+
241
+ class TimestampNTZType(AtomicType, metaclass=DataTypeSingleton):
242
+ """Timestamp (datetime.datetime) data type without timezone information with microsecond precision."""
243
+
244
+ def __init__(self) -> None: # noqa: D107
245
+ super().__init__(DuckDBPyType("TIMESTAMP"))
246
+
247
+ def needConversion(self) -> bool: # noqa: D102
248
+ return True
249
+
250
+ @classmethod
251
+ def typeName(cls) -> str: # noqa: D102
252
+ return "timestamp"
253
+
254
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
255
+ if dt is not None:
256
+ seconds = calendar.timegm(dt.timetuple())
257
+ return int(seconds) * 1000000 + dt.microsecond
258
+
259
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
260
+ if ts is not None:
261
+ # using int to avoid precision loss in float
262
+ return datetime.datetime.utcfromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
263
+
264
+
265
+ class TimestampSecondNTZType(AtomicType, metaclass=DataTypeSingleton):
266
+ """Timestamp (datetime.datetime) data type without timezone information with second precision."""
267
+
268
+ def __init__(self) -> None: # noqa: D107
269
+ super().__init__(DuckDBPyType("TIMESTAMP_S"))
270
+
271
+ def needConversion(self) -> bool: # noqa: D102
272
+ return True
273
+
274
+ @classmethod
275
+ def typeName(cls) -> str: # noqa: D102
276
+ return "timestamp_s"
277
+
278
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
279
+ raise ContributionsAcceptedError
280
+
281
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
282
+ raise ContributionsAcceptedError
283
+
284
+
285
+ class TimestampMilisecondNTZType(AtomicType, metaclass=DataTypeSingleton):
286
+ """Timestamp (datetime.datetime) data type without timezone information with milisecond precision."""
287
+
288
+ def __init__(self) -> None: # noqa: D107
289
+ super().__init__(DuckDBPyType("TIMESTAMP_MS"))
290
+
291
+ def needConversion(self) -> bool: # noqa: D102
292
+ return True
293
+
294
+ @classmethod
295
+ def typeName(cls) -> str: # noqa: D102
296
+ return "timestamp_ms"
297
+
298
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
299
+ raise ContributionsAcceptedError
300
+
301
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
302
+ raise ContributionsAcceptedError
303
+
304
+
305
+ class TimestampNanosecondNTZType(AtomicType, metaclass=DataTypeSingleton):
306
+ """Timestamp (datetime.datetime) data type without timezone information with nanosecond precision."""
307
+
308
+ def __init__(self) -> None: # noqa: D107
309
+ super().__init__(DuckDBPyType("TIMESTAMP_NS"))
310
+
311
+ def needConversion(self) -> bool: # noqa: D102
312
+ return True
313
+
314
+ @classmethod
315
+ def typeName(cls) -> str: # noqa: D102
316
+ return "timestamp_ns"
317
+
318
+ def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
319
+ raise ContributionsAcceptedError
320
+
321
+ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
322
+ raise ContributionsAcceptedError
323
+
324
+
325
+ class DecimalType(FractionalType):
326
+ """Decimal (decimal.Decimal) data type.
327
+
328
+ The DecimalType must have fixed precision (the maximum total number of digits)
329
+ and scale (the number of digits on the right of dot). For example, (5, 2) can
330
+ support the value from [-999.99 to 999.99].
331
+
332
+ The precision can be up to 38, the scale must be less or equal to precision.
333
+
334
+ When creating a DecimalType, the default precision and scale is (10, 0). When inferring
335
+ schema from decimal.Decimal objects, it will be DecimalType(38, 18).
336
+
337
+ Parameters
338
+ ----------
339
+ precision : int, optional
340
+ the maximum (i.e. total) number of digits (default: 10)
341
+ scale : int, optional
342
+ the number of digits on right side of dot. (default: 0)
343
+ """
344
+
345
+ def __init__(self, precision: int = 10, scale: int = 0) -> None: # noqa: D107
346
+ super().__init__(duckdb.decimal_type(precision, scale))
347
+ self.precision = precision
348
+ self.scale = scale
349
+ self.hasPrecisionInfo = True # this is a public API
350
+
351
+ def simpleString(self) -> str: # noqa: D102
352
+ return f"decimal({int(self.precision):d},{int(self.scale):d})"
353
+
354
+ def __repr__(self) -> str: # noqa: D105
355
+ return f"DecimalType({int(self.precision):d},{int(self.scale):d})"
356
+
357
+
358
+ class DoubleType(FractionalType, metaclass=DataTypeSingleton):
359
+ """Double data type, representing double precision floats."""
360
+
361
+ def __init__(self) -> None: # noqa: D107
362
+ super().__init__(DuckDBPyType("DOUBLE"))
363
+
364
+
365
+ class FloatType(FractionalType, metaclass=DataTypeSingleton):
366
+ """Float data type, representing single precision floats."""
367
+
368
+ def __init__(self) -> None: # noqa: D107
369
+ super().__init__(DuckDBPyType("FLOAT"))
370
+
371
+
372
+ class ByteType(IntegralType):
373
+ """Byte data type, i.e. a signed integer in a single byte."""
374
+
375
+ def __init__(self) -> None: # noqa: D107
376
+ super().__init__(DuckDBPyType("TINYINT"))
377
+
378
+ def simpleString(self) -> str: # noqa: D102
379
+ return "tinyint"
380
+
381
+
382
+ class UnsignedByteType(IntegralType):
383
+ """Unsigned byte data type, i.e. a unsigned integer in a single byte."""
384
+
385
+ def __init__(self) -> None: # noqa: D107
386
+ super().__init__(DuckDBPyType("UTINYINT"))
387
+
388
+ def simpleString(self) -> str: # noqa: D102
389
+ return "utinyint"
390
+
391
+
392
+ class ShortType(IntegralType):
393
+ """Short data type, i.e. a signed 16-bit integer."""
394
+
395
+ def __init__(self) -> None: # noqa: D107
396
+ super().__init__(DuckDBPyType("SMALLINT"))
397
+
398
+ def simpleString(self) -> str: # noqa: D102
399
+ return "smallint"
400
+
401
+
402
+ class UnsignedShortType(IntegralType):
403
+ """Unsigned short data type, i.e. a unsigned 16-bit integer."""
404
+
405
+ def __init__(self) -> None: # noqa: D107
406
+ super().__init__(DuckDBPyType("USMALLINT"))
407
+
408
+ def simpleString(self) -> str: # noqa: D102
409
+ return "usmallint"
410
+
411
+
412
+ class IntegerType(IntegralType):
413
+ """Int data type, i.e. a signed 32-bit integer."""
414
+
415
+ def __init__(self) -> None: # noqa: D107
416
+ super().__init__(DuckDBPyType("INTEGER"))
417
+
418
+ def simpleString(self) -> str: # noqa: D102
419
+ return "integer"
420
+
421
+
422
+ class UnsignedIntegerType(IntegralType):
423
+ """Unsigned int data type, i.e. a unsigned 32-bit integer."""
424
+
425
+ def __init__(self) -> None: # noqa: D107
426
+ super().__init__(DuckDBPyType("UINTEGER"))
427
+
428
+ def simpleString(self) -> str: # noqa: D102
429
+ return "uinteger"
430
+
431
+
432
+ class LongType(IntegralType):
433
+ """Long data type, i.e. a signed 64-bit integer.
434
+
435
+ If the values are beyond the range of [-9223372036854775808, 9223372036854775807],
436
+ please use :class:`DecimalType`.
437
+ """
438
+
439
+ def __init__(self) -> None: # noqa: D107
440
+ super().__init__(DuckDBPyType("BIGINT"))
441
+
442
+ def simpleString(self) -> str: # noqa: D102
443
+ return "bigint"
444
+
445
+
446
+ class UnsignedLongType(IntegralType):
447
+ """Unsigned long data type, i.e. a unsigned 64-bit integer.
448
+
449
+ If the values are beyond the range of [0, 18446744073709551615],
450
+ please use :class:`HugeIntegerType`.
451
+ """
452
+
453
+ def __init__(self) -> None: # noqa: D107
454
+ super().__init__(DuckDBPyType("UBIGINT"))
455
+
456
+ def simpleString(self) -> str: # noqa: D102
457
+ return "ubigint"
458
+
459
+
460
+ class HugeIntegerType(IntegralType):
461
+ """Huge integer data type, i.e. a signed 128-bit integer.
462
+
463
+ If the values are beyond the range of [-170141183460469231731687303715884105728,
464
+ 170141183460469231731687303715884105727], please use :class:`DecimalType`.
465
+ """
466
+
467
+ def __init__(self) -> None: # noqa: D107
468
+ super().__init__(DuckDBPyType("HUGEINT"))
469
+
470
+ def simpleString(self) -> str: # noqa: D102
471
+ return "hugeint"
472
+
473
+
474
+ class UnsignedHugeIntegerType(IntegralType):
475
+ """Unsigned huge integer data type, i.e. a unsigned 128-bit integer.
476
+
477
+ If the values are beyond the range of [0, 340282366920938463463374607431768211455],
478
+ please use :class:`DecimalType`.
479
+ """
480
+
481
+ def __init__(self) -> None: # noqa: D107
482
+ super().__init__(DuckDBPyType("UHUGEINT"))
483
+
484
+ def simpleString(self) -> str: # noqa: D102
485
+ return "uhugeint"
486
+
487
+
488
+ class TimeType(IntegralType):
489
+ """Time (datetime.time) data type."""
490
+
491
+ def __init__(self) -> None: # noqa: D107
492
+ super().__init__(DuckDBPyType("TIMETZ"))
493
+
494
+ def simpleString(self) -> str: # noqa: D102
495
+ return "timetz"
496
+
497
+
498
+ class TimeNTZType(IntegralType):
499
+ """Time (datetime.time) data type without timezone information."""
500
+
501
+ def __init__(self) -> None: # noqa: D107
502
+ super().__init__(DuckDBPyType("TIME"))
503
+
504
+ def simpleString(self) -> str: # noqa: D102
505
+ return "time"
506
+
507
+
508
+ class DayTimeIntervalType(AtomicType):
509
+ """DayTimeIntervalType (datetime.timedelta)."""
510
+
511
+ DAY = 0
512
+ HOUR = 1
513
+ MINUTE = 2
514
+ SECOND = 3
515
+
516
+ _fields: Mapping[str, int] = MappingProxyType(
517
+ {
518
+ DAY: "day",
519
+ HOUR: "hour",
520
+ MINUTE: "minute",
521
+ SECOND: "second",
522
+ }
523
+ )
524
+
525
+ _inverted_fields: Mapping[int, str] = MappingProxyType(dict(zip(_fields.values(), _fields.keys())))
526
+
527
+ def __init__(self, startField: Optional[int] = None, endField: Optional[int] = None) -> None: # noqa: D107
528
+ super().__init__(DuckDBPyType("INTERVAL"))
529
+ if startField is None and endField is None:
530
+ # Default matched to scala side.
531
+ startField = DayTimeIntervalType.DAY
532
+ endField = DayTimeIntervalType.SECOND
533
+ elif startField is not None and endField is None:
534
+ endField = startField
535
+
536
+ fields = DayTimeIntervalType._fields
537
+ if startField not in fields or endField not in fields:
538
+ msg = f"interval {startField} to {endField} is invalid"
539
+ raise RuntimeError(msg)
540
+ self.startField = cast("int", startField)
541
+ self.endField = cast("int", endField)
542
+
543
+ def _str_repr(self) -> str:
544
+ fields = DayTimeIntervalType._fields
545
+ start_field_name = fields[self.startField]
546
+ end_field_name = fields[self.endField]
547
+ if start_field_name == end_field_name:
548
+ return f"interval {start_field_name}"
549
+ else:
550
+ return f"interval {start_field_name} to {end_field_name}"
551
+
552
+ simpleString = _str_repr
553
+
554
+ def __repr__(self) -> str: # noqa: D105
555
+ return f"{type(self).__name__}({int(self.startField):d}, {int(self.endField):d})"
556
+
557
+ def needConversion(self) -> bool: # noqa: D102
558
+ return True
559
+
560
+ def toInternal(self, dt: datetime.timedelta) -> Optional[int]: # noqa: D102
561
+ if dt is not None:
562
+ return (math.floor(dt.total_seconds()) * 1000000) + dt.microseconds
563
+
564
+ def fromInternal(self, micros: int) -> Optional[datetime.timedelta]: # noqa: D102
565
+ if micros is not None:
566
+ return datetime.timedelta(microseconds=micros)
567
+
568
+
569
+ class ArrayType(DataType):
570
+ """Array data type.
571
+
572
+ Parameters
573
+ ----------
574
+ elementType : :class:`DataType`
575
+ :class:`DataType` of each element in the array.
576
+ containsNull : bool, optional
577
+ whether the array can contain null (None) values.
578
+
579
+ Examples:
580
+ --------
581
+ >>> ArrayType(StringType()) == ArrayType(StringType(), True)
582
+ True
583
+ >>> ArrayType(StringType(), False) == ArrayType(StringType())
584
+ False
585
+ """
586
+
587
+ def __init__(self, elementType: DataType, containsNull: bool = True) -> None: # noqa: D107
588
+ super().__init__(duckdb.list_type(elementType.duckdb_type))
589
+ assert isinstance(elementType, DataType), f"elementType {elementType} should be an instance of {DataType}"
590
+ self.elementType = elementType
591
+ self.containsNull = containsNull
592
+
593
+ def simpleString(self) -> str: # noqa: D102
594
+ return f"array<{self.elementType.simpleString()}>"
595
+
596
+ def __repr__(self) -> str: # noqa: D105
597
+ return f"ArrayType({self.elementType}, {self.containsNull!s})"
598
+
599
+ def needConversion(self) -> bool: # noqa: D102
600
+ return self.elementType.needConversion()
601
+
602
+ def toInternal(self, obj: list[Optional[T]]) -> list[Optional[T]]: # noqa: D102
603
+ if not self.needConversion():
604
+ return obj
605
+ return obj and [self.elementType.toInternal(v) for v in obj]
606
+
607
+ def fromInternal(self, obj: list[Optional[T]]) -> list[Optional[T]]: # noqa: D102
608
+ if not self.needConversion():
609
+ return obj
610
+ return obj and [self.elementType.fromInternal(v) for v in obj]
611
+
612
+
613
+ class MapType(DataType):
614
+ """Map data type.
615
+
616
+ Parameters
617
+ ----------
618
+ keyType : :class:`DataType`
619
+ :class:`DataType` of the keys in the map.
620
+ valueType : :class:`DataType`
621
+ :class:`DataType` of the values in the map.
622
+ valueContainsNull : bool, optional
623
+ indicates whether values can contain null (None) values.
624
+
625
+ Notes:
626
+ -----
627
+ Keys in a map data type are not allowed to be null (None).
628
+
629
+ Examples:
630
+ --------
631
+ >>> (MapType(StringType(), IntegerType()) == MapType(StringType(), IntegerType(), True))
632
+ True
633
+ >>> (MapType(StringType(), IntegerType(), False) == MapType(StringType(), FloatType()))
634
+ False
635
+ """
636
+
637
+ def __init__(self, keyType: DataType, valueType: DataType, valueContainsNull: bool = True) -> None: # noqa: D107
638
+ super().__init__(duckdb.map_type(keyType.duckdb_type, valueType.duckdb_type))
639
+ assert isinstance(keyType, DataType), f"keyType {keyType} should be an instance of {DataType}"
640
+ assert isinstance(valueType, DataType), f"valueType {valueType} should be an instance of {DataType}"
641
+ self.keyType = keyType
642
+ self.valueType = valueType
643
+ self.valueContainsNull = valueContainsNull
644
+
645
+ def simpleString(self) -> str: # noqa: D102
646
+ return f"map<{self.keyType.simpleString()},{self.valueType.simpleString()}>"
647
+
648
+ def __repr__(self) -> str: # noqa: D105
649
+ return f"MapType({self.keyType}, {self.valueType}, {self.valueContainsNull!s})"
650
+
651
+ def needConversion(self) -> bool: # noqa: D102
652
+ return self.keyType.needConversion() or self.valueType.needConversion()
653
+
654
+ def toInternal(self, obj: dict[T, Optional[U]]) -> dict[T, Optional[U]]: # noqa: D102
655
+ if not self.needConversion():
656
+ return obj
657
+ return obj and {self.keyType.toInternal(k): self.valueType.toInternal(v) for k, v in obj.items()}
658
+
659
+ def fromInternal(self, obj: dict[T, Optional[U]]) -> dict[T, Optional[U]]: # noqa: D102
660
+ if not self.needConversion():
661
+ return obj
662
+ return obj and {self.keyType.fromInternal(k): self.valueType.fromInternal(v) for k, v in obj.items()}
663
+
664
+
665
+ class StructField(DataType):
666
+ """A field in :class:`StructType`.
667
+
668
+ Parameters
669
+ ----------
670
+ name : str
671
+ name of the field.
672
+ dataType : :class:`DataType`
673
+ :class:`DataType` of the field.
674
+ nullable : bool, optional
675
+ whether the field can be null (None) or not.
676
+ metadata : dict, optional
677
+ a dict from string to simple type that can be toInternald to JSON automatically
678
+
679
+ Examples:
680
+ --------
681
+ >>> (StructField("f1", StringType(), True) == StructField("f1", StringType(), True))
682
+ True
683
+ >>> (StructField("f1", StringType(), True) == StructField("f2", StringType(), True))
684
+ False
685
+ """
686
+
687
+ def __init__( # noqa: D107
688
+ self,
689
+ name: str,
690
+ dataType: DataType,
691
+ nullable: bool = True,
692
+ metadata: Optional[dict[str, Any]] = None,
693
+ ) -> None:
694
+ super().__init__(dataType.duckdb_type)
695
+ assert isinstance(dataType, DataType), f"dataType {dataType} should be an instance of {DataType}"
696
+ assert isinstance(name, str), f"field name {name} should be a string"
697
+ self.name = name
698
+ self.dataType = dataType
699
+ self.nullable = nullable
700
+ self.metadata = metadata or {}
701
+
702
+ def simpleString(self) -> str: # noqa: D102
703
+ return f"{self.name}:{self.dataType.simpleString()}"
704
+
705
+ def __repr__(self) -> str: # noqa: D105
706
+ return f"StructField('{self.name}', {self.dataType}, {self.nullable!s})"
707
+
708
+ def needConversion(self) -> bool: # noqa: D102
709
+ return self.dataType.needConversion()
710
+
711
+ def toInternal(self, obj: T) -> T: # noqa: D102
712
+ return self.dataType.toInternal(obj)
713
+
714
+ def fromInternal(self, obj: T) -> T: # noqa: D102
715
+ return self.dataType.fromInternal(obj)
716
+
717
+ def typeName(self) -> str: # type: ignore[override] # noqa: D102
718
+ msg = "StructField does not have typeName. Use typeName on its type explicitly instead."
719
+ raise TypeError(msg)
720
+
721
+
722
+ class StructType(DataType):
723
+ r"""Struct type, consisting of a list of :class:`StructField`.
724
+
725
+ This is the data type representing a :class:`Row`.
726
+
727
+ Iterating a :class:`StructType` will iterate over its :class:`StructField`\\s.
728
+ A contained :class:`StructField` can be accessed by its name or position.
729
+
730
+ Examples:
731
+ --------
732
+ >>> struct1 = StructType([StructField("f1", StringType(), True)])
733
+ >>> struct1["f1"]
734
+ StructField('f1', StringType(), True)
735
+ >>> struct1[0]
736
+ StructField('f1', StringType(), True)
737
+
738
+ >>> struct1 = StructType([StructField("f1", StringType(), True)])
739
+ >>> struct2 = StructType([StructField("f1", StringType(), True)])
740
+ >>> struct1 == struct2
741
+ True
742
+ >>> struct1 = StructType([StructField("f1", StringType(), True)])
743
+ >>> struct2 = StructType(
744
+ ... [StructField("f1", StringType(), True), StructField("f2", IntegerType(), False)]
745
+ ... )
746
+ >>> struct1 == struct2
747
+ False
748
+ """
749
+
750
+ def _update_internal_duckdb_type(self) -> None:
751
+ self.duckdb_type = duckdb.struct_type(dict(zip(self.names, [x.duckdb_type for x in self.fields])))
752
+
753
+ def __init__(self, fields: Optional[list[StructField]] = None) -> None: # noqa: D107
754
+ if not fields:
755
+ self.fields = []
756
+ self.names = []
757
+ else:
758
+ self.fields = fields
759
+ self.names = [f.name for f in fields]
760
+ assert all(isinstance(f, StructField) for f in fields), "fields should be a list of StructField"
761
+ # Precalculated list of fields that need conversion with fromInternal/toInternal functions
762
+ self._needConversion = [f.needConversion() for f in self]
763
+ self._needSerializeAnyField = any(self._needConversion)
764
+ super().__init__(duckdb.struct_type(dict(zip(self.names, [x.duckdb_type for x in self.fields]))))
765
+
766
+ @overload
767
+ def add(
768
+ self,
769
+ field: str,
770
+ data_type: Union[str, DataType],
771
+ nullable: bool = True,
772
+ metadata: Optional[dict[str, Any]] = None,
773
+ ) -> "StructType": ...
774
+
775
+ @overload
776
+ def add(self, field: StructField) -> "StructType": ...
777
+
778
+ def add(
779
+ self,
780
+ field: Union[str, StructField],
781
+ data_type: Optional[Union[str, DataType]] = None,
782
+ nullable: bool = True,
783
+ metadata: Optional[dict[str, Any]] = None,
784
+ ) -> "StructType":
785
+ r"""Construct a :class:`StructType` by adding new elements to it, to define the schema.
786
+ The method accepts either:
787
+
788
+ a) A single parameter which is a :class:`StructField` object.
789
+ b) Between 2 and 4 parameters as (name, data_type, nullable (optional),
790
+ metadata(optional). The data_type parameter may be either a String or a
791
+ :class:`DataType` object.
792
+
793
+ Parameters
794
+ ----------
795
+ field : str or :class:`StructField`
796
+ Either the name of the field or a :class:`StructField` object
797
+ data_type : :class:`DataType`, optional
798
+ If present, the DataType of the :class:`StructField` to create
799
+ nullable : bool, optional
800
+ Whether the field to add should be nullable (default True)
801
+ metadata : dict, optional
802
+ Any additional metadata (default None)
803
+
804
+ Returns:
805
+ -------
806
+ :class:`StructType`
807
+
808
+ Examples:
809
+ --------
810
+ >>> struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
811
+ >>> struct2 = StructType([StructField("f1", StringType(), True), \\
812
+ ... StructField("f2", StringType(), True, None)])
813
+ >>> struct1 == struct2
814
+ True
815
+ >>> struct1 = StructType().add(StructField("f1", StringType(), True))
816
+ >>> struct2 = StructType([StructField("f1", StringType(), True)])
817
+ >>> struct1 == struct2
818
+ True
819
+ >>> struct1 = StructType().add("f1", "string", True)
820
+ >>> struct2 = StructType([StructField("f1", StringType(), True)])
821
+ >>> struct1 == struct2
822
+ True
823
+ """ # noqa: D205, D415
824
+ if isinstance(field, StructField):
825
+ self.fields.append(field)
826
+ self.names.append(field.name)
827
+ else:
828
+ if isinstance(field, str) and data_type is None:
829
+ msg = "Must specify DataType if passing name of struct_field to create."
830
+ raise ValueError(msg)
831
+ else:
832
+ data_type_f = data_type
833
+ self.fields.append(StructField(field, data_type_f, nullable, metadata))
834
+ self.names.append(field)
835
+ # Precalculated list of fields that need conversion with fromInternal/toInternal functions
836
+ self._needConversion = [f.needConversion() for f in self]
837
+ self._needSerializeAnyField = any(self._needConversion)
838
+ self._update_internal_duckdb_type()
839
+ return self
840
+
841
+ def __iter__(self) -> Iterator[StructField]:
842
+ """Iterate the fields."""
843
+ return iter(self.fields)
844
+
845
+ def __len__(self) -> int:
846
+ """Return the number of fields."""
847
+ return len(self.fields)
848
+
849
+ def __getitem__(self, key: Union[str, int]) -> StructField:
850
+ """Access fields by name or slice."""
851
+ if isinstance(key, str):
852
+ for field in self:
853
+ if field.name == key:
854
+ return field
855
+ msg = f"No StructField named {key}"
856
+ raise KeyError(msg)
857
+ elif isinstance(key, int):
858
+ try:
859
+ return self.fields[key]
860
+ except IndexError:
861
+ msg = "StructType index out of range"
862
+ raise IndexError(msg) # noqa: B904
863
+ elif isinstance(key, slice):
864
+ return StructType(self.fields[key])
865
+ else:
866
+ msg = "StructType keys should be strings, integers or slices"
867
+ raise TypeError(msg)
868
+
869
+ def simpleString(self) -> str: # noqa: D102
870
+ return "struct<{}>".format(",".join(f.simpleString() for f in self))
871
+
872
+ def __repr__(self) -> str: # noqa: D105
873
+ return "StructType([{}])".format(", ".join(str(field) for field in self))
874
+
875
+ def __contains__(self, item: str) -> bool: # noqa: D105
876
+ return item in self.names
877
+
878
+ def extract_types_and_names(self) -> tuple[list[str], list[str]]: # noqa: D102
879
+ names = []
880
+ types = []
881
+ for f in self.fields:
882
+ types.append(str(f.dataType.duckdb_type))
883
+ names.append(f.name)
884
+ return (types, names)
885
+
886
+ def fieldNames(self) -> list[str]:
887
+ """Returns all field names in a list.
888
+
889
+ Examples:
890
+ --------
891
+ >>> struct = StructType([StructField("f1", StringType(), True)])
892
+ >>> struct.fieldNames()
893
+ ['f1']
894
+ """
895
+ return list(self.names)
896
+
897
+ def needConversion(self) -> bool: # noqa: D102
898
+ # We need convert Row()/namedtuple into tuple()
899
+ return True
900
+
901
+ def toInternal(self, obj: tuple) -> tuple: # noqa: D102
902
+ if obj is None:
903
+ return
904
+
905
+ if self._needSerializeAnyField:
906
+ # Only calling toInternal function for fields that need conversion
907
+ if isinstance(obj, dict):
908
+ return tuple(
909
+ f.toInternal(obj.get(n)) if c else obj.get(n)
910
+ for n, f, c in zip(self.names, self.fields, self._needConversion)
911
+ )
912
+ elif isinstance(obj, (tuple, list)):
913
+ return tuple(f.toInternal(v) if c else v for f, v, c in zip(self.fields, obj, self._needConversion))
914
+ elif hasattr(obj, "__dict__"):
915
+ d = obj.__dict__
916
+ return tuple(
917
+ f.toInternal(d.get(n)) if c else d.get(n)
918
+ for n, f, c in zip(self.names, self.fields, self._needConversion)
919
+ )
920
+ else:
921
+ msg = f"Unexpected tuple {obj!r} with StructType"
922
+ raise ValueError(msg)
923
+ else:
924
+ if isinstance(obj, dict):
925
+ return tuple(obj.get(n) for n in self.names)
926
+ elif isinstance(obj, (list, tuple)):
927
+ return tuple(obj)
928
+ elif hasattr(obj, "__dict__"):
929
+ d = obj.__dict__
930
+ return tuple(d.get(n) for n in self.names)
931
+ else:
932
+ msg = f"Unexpected tuple {obj!r} with StructType"
933
+ raise ValueError(msg)
934
+
935
+ def fromInternal(self, obj: tuple) -> "Row": # noqa: D102
936
+ if obj is None:
937
+ return
938
+ if isinstance(obj, Row):
939
+ # it's already converted by pickler
940
+ return obj
941
+
942
+ values: Union[tuple, list]
943
+ if self._needSerializeAnyField:
944
+ # Only calling fromInternal function for fields that need conversion
945
+ values = [f.fromInternal(v) if c else v for f, v, c in zip(self.fields, obj, self._needConversion)]
946
+ else:
947
+ values = obj
948
+ return _create_row(self.names, values)
949
+
950
+
951
+ class UnionType(DataType):
952
+ def __init__(self) -> None:
953
+ raise ContributionsAcceptedError
954
+
955
+
956
+ class UserDefinedType(DataType):
957
+ """User-defined type (UDT).
958
+
959
+ .. note:: WARN: Spark Internal Use Only
960
+ """
961
+
962
+ def __init__(self) -> None:
963
+ raise ContributionsAcceptedError
964
+
965
+ @classmethod
966
+ def typeName(cls) -> str:
967
+ return cls.__name__.lower()
968
+
969
+ @classmethod
970
+ def sqlType(cls) -> DataType:
971
+ """Underlying SQL storage type for this UDT."""
972
+ msg = "UDT must implement sqlType()."
973
+ raise NotImplementedError(msg)
974
+
975
+ @classmethod
976
+ def module(cls) -> str:
977
+ """The Python module of the UDT."""
978
+ msg = "UDT must implement module()."
979
+ raise NotImplementedError(msg)
980
+
981
+ @classmethod
982
+ def scalaUDT(cls) -> str:
983
+ """The class name of the paired Scala UDT (could be '', if there
984
+ is no corresponding one).
985
+ """ # noqa: D205
986
+ return ""
987
+
988
+ def needConversion(self) -> bool:
989
+ return True
990
+
991
+ @classmethod
992
+ def _cachedSqlType(cls) -> DataType:
993
+ """Cache the sqlType() into class, because it's heavily used in `toInternal`."""
994
+ if not hasattr(cls, "_cached_sql_type"):
995
+ cls._cached_sql_type = cls.sqlType() # type: ignore[attr-defined]
996
+ return cls._cached_sql_type # type: ignore[attr-defined]
997
+
998
+ def toInternal(self, obj: Any) -> Any: # noqa: ANN401
999
+ if obj is not None:
1000
+ return self._cachedSqlType().toInternal(self.serialize(obj))
1001
+
1002
+ def fromInternal(self, obj: Any) -> Any: # noqa: ANN401
1003
+ v = self._cachedSqlType().fromInternal(obj)
1004
+ if v is not None:
1005
+ return self.deserialize(v)
1006
+
1007
+ def serialize(self, obj: Any) -> NoReturn: # noqa: ANN401
1008
+ """Converts a user-type object into a SQL datum."""
1009
+ msg = "UDT must implement toInternal()."
1010
+ raise NotImplementedError(msg)
1011
+
1012
+ def deserialize(self, datum: Any) -> NoReturn: # noqa: ANN401
1013
+ """Converts a SQL datum into a user-type object."""
1014
+ msg = "UDT must implement fromInternal()."
1015
+ raise NotImplementedError(msg)
1016
+
1017
+ def simpleString(self) -> str:
1018
+ return "udt"
1019
+
1020
+ def __eq__(self, other: object) -> bool:
1021
+ return type(self) is type(other)
1022
+
1023
+
1024
+ _atomic_types: list[type[DataType]] = [
1025
+ StringType,
1026
+ BinaryType,
1027
+ BooleanType,
1028
+ DecimalType,
1029
+ FloatType,
1030
+ DoubleType,
1031
+ ByteType,
1032
+ ShortType,
1033
+ IntegerType,
1034
+ LongType,
1035
+ DateType,
1036
+ TimestampType,
1037
+ TimestampNTZType,
1038
+ NullType,
1039
+ ]
1040
+ _all_atomic_types: dict[str, type[DataType]] = {t.typeName(): t for t in _atomic_types}
1041
+
1042
+ _complex_types: list[type[Union[ArrayType, MapType, StructType]]] = [
1043
+ ArrayType,
1044
+ MapType,
1045
+ StructType,
1046
+ ]
1047
+ _all_complex_types: dict[str, type[Union[ArrayType, MapType, StructType]]] = {v.typeName(): v for v in _complex_types}
1048
+
1049
+
1050
+ _FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)")
1051
+ _INTERVAL_DAYTIME = re.compile(r"interval (day|hour|minute|second)( to (day|hour|minute|second))?")
1052
+
1053
+
1054
+ def _create_row(fields: Union["Row", list[str]], values: Union[tuple[Any, ...], list[Any]]) -> "Row":
1055
+ row = Row(*values)
1056
+ row.__fields__ = fields
1057
+ return row
1058
+
1059
+
1060
+ class Row(tuple):
1061
+ """A row in :class:`DataFrame`.
1062
+ The fields in it can be accessed:
1063
+
1064
+ * like attributes (``row.key``)
1065
+ * like dictionary values (``row[key]``)
1066
+
1067
+ ``key in row`` will search through row keys.
1068
+
1069
+ Row can be used to create a row object by using named arguments.
1070
+ It is not allowed to omit a named argument to represent that the value is
1071
+ None or missing. This should be explicitly set to None in this case.
1072
+
1073
+ .. versionchanged:: 3.0.0
1074
+ Rows created from named arguments no longer have
1075
+ field names sorted alphabetically and will be ordered in the position as
1076
+ entered.
1077
+
1078
+ Examples:
1079
+ --------
1080
+ >>> row = Row(name="Alice", age=11)
1081
+ >>> row
1082
+ Row(name='Alice', age=11)
1083
+ >>> row["name"], row["age"]
1084
+ ('Alice', 11)
1085
+ >>> row.name, row.age
1086
+ ('Alice', 11)
1087
+ >>> "name" in row
1088
+ True
1089
+ >>> "wrong_key" in row
1090
+ False
1091
+
1092
+ Row also can be used to create another Row like class, then it
1093
+ could be used to create Row objects, such as
1094
+
1095
+ >>> Person = Row("name", "age")
1096
+ >>> Person
1097
+ <Row('name', 'age')>
1098
+ >>> "name" in Person
1099
+ True
1100
+ >>> "wrong_key" in Person
1101
+ False
1102
+ >>> Person("Alice", 11)
1103
+ Row(name='Alice', age=11)
1104
+
1105
+ This form can also be used to create rows as tuple values, i.e. with unnamed
1106
+ fields.
1107
+
1108
+ >>> row1 = Row("Alice", 11)
1109
+ >>> row2 = Row(name="Alice", age=11)
1110
+ >>> row1 == row2
1111
+ True
1112
+ """ # noqa: D205, D415
1113
+
1114
+ @overload
1115
+ def __new__(cls, *args: str) -> "Row": ...
1116
+
1117
+ @overload
1118
+ def __new__(cls, **kwargs: Any) -> "Row": ... # noqa: ANN401
1119
+
1120
+ def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row": # noqa: D102
1121
+ if args and kwargs:
1122
+ msg = "Can not use both args and kwargs to create Row"
1123
+ raise ValueError(msg)
1124
+ if kwargs:
1125
+ # create row objects
1126
+ row = tuple.__new__(cls, list(kwargs.values()))
1127
+ row.__fields__ = list(kwargs.keys())
1128
+ return row
1129
+ else:
1130
+ # create row class or objects
1131
+ return tuple.__new__(cls, args)
1132
+
1133
+ def asDict(self, recursive: bool = False) -> dict[str, Any]:
1134
+ """Return as a dict.
1135
+
1136
+ Parameters
1137
+ ----------
1138
+ recursive : bool, optional
1139
+ turns the nested Rows to dict (default: False).
1140
+
1141
+ Notes:
1142
+ -----
1143
+ If a row contains duplicate field names, e.g., the rows of a join
1144
+ between two :class:`DataFrame` that both have the fields of same names,
1145
+ one of the duplicate fields will be selected by ``asDict``. ``__getitem__``
1146
+ will also return one of the duplicate fields, however returned value might
1147
+ be different to ``asDict``.
1148
+
1149
+ Examples:
1150
+ --------
1151
+ >>> Row(name="Alice", age=11).asDict() == {"name": "Alice", "age": 11}
1152
+ True
1153
+ >>> row = Row(key=1, value=Row(name="a", age=2))
1154
+ >>> row.asDict() == {"key": 1, "value": Row(name="a", age=2)}
1155
+ True
1156
+ >>> row.asDict(True) == {"key": 1, "value": {"name": "a", "age": 2}}
1157
+ True
1158
+ """
1159
+ if not hasattr(self, "__fields__"):
1160
+ msg = "Cannot convert a Row class into dict"
1161
+ raise TypeError(msg)
1162
+
1163
+ if recursive:
1164
+
1165
+ def conv(obj: Union[Row, list, dict, object]) -> Union[list, dict, object]:
1166
+ if isinstance(obj, Row):
1167
+ return obj.asDict(True)
1168
+ elif isinstance(obj, list):
1169
+ return [conv(o) for o in obj]
1170
+ elif isinstance(obj, dict):
1171
+ return {k: conv(v) for k, v in obj.items()}
1172
+ else:
1173
+ return obj
1174
+
1175
+ return dict(zip(self.__fields__, (conv(o) for o in self)))
1176
+ else:
1177
+ return dict(zip(self.__fields__, self))
1178
+
1179
+ def __contains__(self, item: Any) -> bool: # noqa: D105, ANN401
1180
+ if hasattr(self, "__fields__"):
1181
+ return item in self.__fields__
1182
+ else:
1183
+ return super().__contains__(item)
1184
+
1185
+ # let object acts like class
1186
+ def __call__(self, *args: Any) -> "Row": # noqa: ANN401
1187
+ """Create new Row object."""
1188
+ if len(args) > len(self):
1189
+ msg = f"Can not create Row with fields {self}, expected {len(self):d} values but got {args}"
1190
+ raise ValueError(msg)
1191
+ return _create_row(self, args)
1192
+
1193
+ def __getitem__(self, item: Any) -> Any: # noqa: D105, ANN401
1194
+ if isinstance(item, (int, slice)):
1195
+ return super().__getitem__(item)
1196
+ try:
1197
+ # it will be slow when it has many fields,
1198
+ # but this will not be used in normal cases
1199
+ idx = self.__fields__.index(item)
1200
+ return super().__getitem__(idx)
1201
+ except IndexError:
1202
+ raise KeyError(item) # noqa: B904
1203
+ except ValueError:
1204
+ raise ValueError(item) # noqa: B904
1205
+
1206
+ def __getattr__(self, item: str) -> Any: # noqa: D105, ANN401
1207
+ if item.startswith("__"):
1208
+ raise AttributeError(item)
1209
+ try:
1210
+ # it will be slow when it has many fields,
1211
+ # but this will not be used in normal cases
1212
+ idx = self.__fields__.index(item)
1213
+ return self[idx]
1214
+ except IndexError:
1215
+ raise AttributeError(item) # noqa: B904
1216
+ except ValueError:
1217
+ raise AttributeError(item) # noqa: B904
1218
+
1219
+ def __setattr__(self, key: Any, value: Any) -> None: # noqa: D105, ANN401
1220
+ if key != "__fields__":
1221
+ msg = "Row is read-only"
1222
+ raise RuntimeError(msg)
1223
+ self.__dict__[key] = value
1224
+
1225
+ def __reduce__(
1226
+ self,
1227
+ ) -> Union[str, tuple[Any, ...]]:
1228
+ """Returns a tuple so Python knows how to pickle Row."""
1229
+ if hasattr(self, "__fields__"):
1230
+ return (_create_row, (self.__fields__, tuple(self)))
1231
+ else:
1232
+ return tuple.__reduce__(self)
1233
+
1234
+ def __repr__(self) -> str:
1235
+ """Printable representation of Row used in Python REPL."""
1236
+ if hasattr(self, "__fields__"):
1237
+ return "Row({})".format(", ".join(f"{k}={v!r}" for k, v in zip(self.__fields__, tuple(self))))
1238
+ else:
1239
+ return "<Row({})>".format(", ".join(f"{field!r}" for field in self))