duckdb 1.5.0.dev44__cp313-cp313-win_amd64.whl → 1.5.0.dev94__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckdb might be problematic. Click here for more details.

Files changed (56) hide show
  1. _duckdb-stubs/__init__.pyi +1443 -0
  2. _duckdb-stubs/_func.pyi +46 -0
  3. _duckdb-stubs/_sqltypes.pyi +75 -0
  4. _duckdb.cp313-win_amd64.pyd +0 -0
  5. adbc_driver_duckdb/__init__.py +49 -0
  6. adbc_driver_duckdb/dbapi.py +115 -0
  7. duckdb/__init__.py +341 -435
  8. duckdb/_dbapi_type_object.py +231 -0
  9. duckdb/_version.py +22 -0
  10. duckdb/bytes_io_wrapper.py +12 -9
  11. duckdb/experimental/__init__.py +2 -1
  12. duckdb/experimental/spark/__init__.py +3 -4
  13. duckdb/experimental/spark/_globals.py +8 -8
  14. duckdb/experimental/spark/_typing.py +7 -9
  15. duckdb/experimental/spark/conf.py +16 -15
  16. duckdb/experimental/spark/context.py +60 -44
  17. duckdb/experimental/spark/errors/__init__.py +33 -35
  18. duckdb/experimental/spark/errors/error_classes.py +1 -1
  19. duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
  20. duckdb/experimental/spark/errors/exceptions/base.py +39 -88
  21. duckdb/experimental/spark/errors/utils.py +11 -16
  22. duckdb/experimental/spark/exception.py +9 -6
  23. duckdb/experimental/spark/sql/__init__.py +5 -5
  24. duckdb/experimental/spark/sql/_typing.py +8 -15
  25. duckdb/experimental/spark/sql/catalog.py +21 -20
  26. duckdb/experimental/spark/sql/column.py +48 -55
  27. duckdb/experimental/spark/sql/conf.py +9 -8
  28. duckdb/experimental/spark/sql/dataframe.py +185 -233
  29. duckdb/experimental/spark/sql/functions.py +1222 -1248
  30. duckdb/experimental/spark/sql/group.py +56 -52
  31. duckdb/experimental/spark/sql/readwriter.py +80 -94
  32. duckdb/experimental/spark/sql/session.py +64 -59
  33. duckdb/experimental/spark/sql/streaming.py +9 -10
  34. duckdb/experimental/spark/sql/type_utils.py +67 -65
  35. duckdb/experimental/spark/sql/types.py +309 -345
  36. duckdb/experimental/spark/sql/udf.py +6 -6
  37. duckdb/filesystem.py +26 -16
  38. duckdb/func/__init__.py +3 -0
  39. duckdb/functional/__init__.py +12 -16
  40. duckdb/polars_io.py +130 -83
  41. duckdb/query_graph/__main__.py +91 -96
  42. duckdb/sqltypes/__init__.py +63 -0
  43. duckdb/typing/__init__.py +18 -8
  44. duckdb/udf.py +10 -5
  45. duckdb/value/__init__.py +1 -0
  46. duckdb/value/constant/__init__.py +62 -60
  47. {duckdb-1.5.0.dev44.dist-info → duckdb-1.5.0.dev94.dist-info}/METADATA +12 -4
  48. duckdb-1.5.0.dev94.dist-info/RECORD +52 -0
  49. duckdb/__init__.pyi +0 -713
  50. duckdb/functional/__init__.pyi +0 -31
  51. duckdb/typing/__init__.pyi +0 -36
  52. duckdb/value/constant/__init__.pyi +0 -115
  53. duckdb-1.5.0.dev44.dist-info/RECORD +0 -47
  54. /duckdb/{value/__init__.pyi → py.typed} +0 -0
  55. {duckdb-1.5.0.dev44.dist-info → duckdb-1.5.0.dev94.dist-info}/WHEEL +0 -0
  56. {duckdb-1.5.0.dev44.dist-info → duckdb-1.5.0.dev94.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,231 @@
1
+ """DuckDB DB API 2.0 Type Objects Module.
2
+
3
+ This module provides DB API 2.0 compliant type objects for DuckDB, allowing applications
4
+ to check column types returned by queries against standard database API categories.
5
+
6
+ Example:
7
+ >>> import duckdb
8
+ >>>
9
+ >>> conn = duckdb.connect()
10
+ >>> cursor = conn.cursor()
11
+ >>> cursor.execute("SELECT 'hello' as text_col, 42 as num_col, CURRENT_DATE as date_col")
12
+ >>>
13
+ >>> # Check column types using DB API type objects
14
+ >>> for i, desc in enumerate(cursor.description):
15
+ >>> col_name, col_type = desc[0], desc[1]
16
+ >>> if col_type == duckdb.STRING:
17
+ >>> print(f"{col_name} is a string type")
18
+ >>> elif col_type == duckdb.NUMBER:
19
+ >>> print(f"{col_name} is a numeric type")
20
+ >>> elif col_type == duckdb.DATETIME:
21
+ >>> print(f"{col_name} is a date/time type")
22
+
23
+ See Also:
24
+ - PEP 249: https://peps.python.org/pep-0249/
25
+ - DuckDB Type System: https://duckdb.org/docs/sql/data_types/overview
26
+ """
27
+
28
+ from duckdb import sqltypes
29
+
30
+
31
+ class DBAPITypeObject:
32
+ """DB API 2.0 type object for categorizing database column types.
33
+
34
+ This class implements the type objects defined in PEP 249 (DB API 2.0).
35
+ It allows checking whether a specific DuckDB type belongs to a broader
36
+ category like STRING, NUMBER, DATETIME, etc.
37
+
38
+ The type object supports equality comparison with DuckDBPyType instances,
39
+ returning True if the type belongs to this category.
40
+
41
+ Args:
42
+ types: A list of DuckDBPyType instances that belong to this type category.
43
+
44
+ Example:
45
+ >>> string_types = DBAPITypeObject([sqltypes.VARCHAR, sqltypes.CHAR])
46
+ >>> result = sqltypes.VARCHAR == string_types # True
47
+ >>> result = sqltypes.INTEGER == string_types # False
48
+
49
+ Note:
50
+ This follows the DB API 2.0 specification where type objects are compared
51
+ using equality operators rather than isinstance() checks.
52
+ """
53
+
54
+ def __init__(self, types: list[sqltypes.DuckDBPyType]) -> None:
55
+ """Initialize a DB API type object.
56
+
57
+ Args:
58
+ types: List of DuckDB types that belong to this category.
59
+ """
60
+ self.types = types
61
+
62
+ def __eq__(self, other: object) -> bool:
63
+ """Check if a DuckDB type belongs to this type category.
64
+
65
+ This method implements the DB API 2.0 type checking mechanism.
66
+ It returns True if the other object is a DuckDBPyType that
67
+ is contained in this type category.
68
+
69
+ Args:
70
+ other: The object to compare, typically a DuckDBPyType instance.
71
+
72
+ Returns:
73
+ True if other is a DuckDBPyType in this category, False otherwise.
74
+
75
+ Example:
76
+ >>> NUMBER == sqltypes.INTEGER # True
77
+ >>> NUMBER == sqltypes.VARCHAR # False
78
+ """
79
+ if isinstance(other, sqltypes.DuckDBPyType):
80
+ return other in self.types
81
+ return False
82
+
83
+ def __repr__(self) -> str:
84
+ """Return a string representation of this type object.
85
+
86
+ Returns:
87
+ A string showing the type object and its contained DuckDB types.
88
+
89
+ Example:
90
+ >>> repr(STRING)
91
+ '<DBAPITypeObject [VARCHAR]>'
92
+ """
93
+ return f"<DBAPITypeObject [{','.join(str(x) for x in self.types)}]>"
94
+
95
+
96
+ # Define the standard DB API 2.0 type objects for DuckDB
97
+
98
+ STRING = DBAPITypeObject([sqltypes.VARCHAR])
99
+ """
100
+ STRING type object for text-based database columns.
101
+
102
+ This type object represents all string/text types in DuckDB. Currently includes:
103
+ - VARCHAR: Variable-length character strings
104
+
105
+ Use this to check if a column contains textual data that should be handled
106
+ as Python strings.
107
+
108
+ DB API 2.0 Reference:
109
+ https://peps.python.org/pep-0249/#string
110
+
111
+ Example:
112
+ >>> cursor.description[0][1] == STRING # Check if first column is text
113
+ """
114
+
115
+ NUMBER = DBAPITypeObject(
116
+ [
117
+ sqltypes.TINYINT,
118
+ sqltypes.UTINYINT,
119
+ sqltypes.SMALLINT,
120
+ sqltypes.USMALLINT,
121
+ sqltypes.INTEGER,
122
+ sqltypes.UINTEGER,
123
+ sqltypes.BIGINT,
124
+ sqltypes.UBIGINT,
125
+ sqltypes.HUGEINT,
126
+ sqltypes.UHUGEINT,
127
+ sqltypes.DuckDBPyType("BIGNUM"),
128
+ sqltypes.DuckDBPyType("DECIMAL"),
129
+ sqltypes.FLOAT,
130
+ sqltypes.DOUBLE,
131
+ ]
132
+ )
133
+ """
134
+ NUMBER type object for numeric database columns.
135
+
136
+ This type object represents all numeric types in DuckDB, including:
137
+
138
+ Integer Types:
139
+ - TINYINT, UTINYINT: 8-bit signed/unsigned integers
140
+ - SMALLINT, USMALLINT: 16-bit signed/unsigned integers
141
+ - INTEGER, UINTEGER: 32-bit signed/unsigned integers
142
+ - BIGINT, UBIGINT: 64-bit signed/unsigned integers
143
+ - HUGEINT, UHUGEINT: 128-bit signed/unsigned integers
144
+
145
+ Decimal Types:
146
+ - BIGNUM: Arbitrary precision integers
147
+ - DECIMAL: Fixed-point decimal numbers
148
+
149
+ Floating Point Types:
150
+ - FLOAT: 32-bit floating point
151
+ - DOUBLE: 64-bit floating point
152
+
153
+ Use this to check if a column contains numeric data that should be handled
154
+ as Python int, float, or Decimal objects.
155
+
156
+ DB API 2.0 Reference:
157
+ https://peps.python.org/pep-0249/#number
158
+
159
+ Example:
160
+ >>> cursor.description[1][1] == NUMBER # Check if second column is numeric
161
+ """
162
+
163
+ DATETIME = DBAPITypeObject(
164
+ [
165
+ sqltypes.DATE,
166
+ sqltypes.TIME,
167
+ sqltypes.TIME_TZ,
168
+ sqltypes.TIMESTAMP,
169
+ sqltypes.TIMESTAMP_TZ,
170
+ sqltypes.TIMESTAMP_NS,
171
+ sqltypes.TIMESTAMP_MS,
172
+ sqltypes.TIMESTAMP_S,
173
+ ]
174
+ )
175
+ """
176
+ DATETIME type object for date and time database columns.
177
+
178
+ This type object represents all date/time types in DuckDB, including:
179
+
180
+ Date Types:
181
+ - DATE: Calendar dates (year, month, day)
182
+
183
+ Time Types:
184
+ - TIME: Time of day without timezone
185
+ - TIME_TZ: Time of day with timezone
186
+
187
+ Timestamp Types:
188
+ - TIMESTAMP: Date and time without timezone (microsecond precision)
189
+ - TIMESTAMP_TZ: Date and time with timezone
190
+ - TIMESTAMP_NS: Nanosecond precision timestamps
191
+ - TIMESTAMP_MS: Millisecond precision timestamps
192
+ - TIMESTAMP_S: Second precision timestamps
193
+
194
+ Use this to check if a column contains temporal data that should be handled
195
+ as Python datetime, date, or time objects.
196
+
197
+ DB API 2.0 Reference:
198
+ https://peps.python.org/pep-0249/#datetime
199
+
200
+ Example:
201
+ >>> cursor.description[2][1] == DATETIME # Check if third column is date/time
202
+ """
203
+
204
+ BINARY = DBAPITypeObject([sqltypes.BLOB])
205
+ """
206
+ BINARY type object for binary data database columns.
207
+
208
+ This type object represents binary data types in DuckDB:
209
+ - BLOB: Binary Large Objects for storing arbitrary binary data
210
+
211
+ Use this to check if a column contains binary data that should be handled
212
+ as Python bytes objects.
213
+
214
+ DB API 2.0 Reference:
215
+ https://peps.python.org/pep-0249/#binary
216
+
217
+ Example:
218
+ >>> cursor.description[3][1] == BINARY # Check if fourth column is binary
219
+ """
220
+
221
+ ROWID = None
222
+ """
223
+ ROWID type object for row identifier columns.
224
+
225
+ DB API 2.0 Reference:
226
+ https://peps.python.org/pep-0249/#rowid
227
+
228
+ Note:
229
+ This will always be None for DuckDB connections. Applications should not
230
+ rely on ROWID functionality when using DuckDB.
231
+ """
duckdb/_version.py ADDED
@@ -0,0 +1,22 @@
1
+ # ----------------------------------------------------------------------
2
+ # Version API
3
+ #
4
+ # We provide three symbols:
5
+ # - duckdb.__version__: The version of this package
6
+ # - duckdb.__duckdb_version__: The version of duckdb that is bundled
7
+ # - duckdb.version(): A human-readable version string containing both of the above
8
+ # ----------------------------------------------------------------------
9
+ from importlib.metadata import version as _dist_version
10
+
11
+ import _duckdb
12
+
13
+ __version__: str = _dist_version("duckdb")
14
+ """Version of the DuckDB Python Package."""
15
+
16
+ __duckdb_version__: str = _duckdb.__version__
17
+ """Version of DuckDB that is bundled."""
18
+
19
+
20
+ def version() -> str:
21
+ """Human-friendly formatted version string of both the distribution package and the bundled DuckDB engine."""
22
+ return f"{__version__} (with duckdb {_duckdb.__version__})"
@@ -1,7 +1,5 @@
1
- from io import StringIO, TextIOBase
2
- from typing import Union
1
+ """StringIO buffer wrapper.
3
2
 
4
- """
5
3
  BSD 3-Clause License
6
4
 
7
5
  Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
@@ -35,11 +33,17 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35
33
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
34
  """
37
35
 
36
+ from io import StringIO, TextIOBase
37
+ from typing import Any, Union
38
+
38
39
 
39
40
  class BytesIOWrapper:
40
- # Wrapper that wraps a StringIO buffer and reads bytes from it
41
- # Created for compat with pyarrow read_csv
42
- def __init__(self, buffer: Union[StringIO, TextIOBase], encoding: str = "utf-8") -> None:
41
+ """Wrapper that wraps a StringIO buffer and reads bytes from it.
42
+
43
+ Created for compat with pyarrow read_csv.
44
+ """
45
+
46
+ def __init__(self, buffer: Union[StringIO, TextIOBase], encoding: str = "utf-8") -> None: # noqa: D107
43
47
  self.buffer = buffer
44
48
  self.encoding = encoding
45
49
  # Because a character can be represented by more than 1 byte,
@@ -48,10 +52,10 @@ class BytesIOWrapper:
48
52
  # overflow to the front of the bytestring the next time reading is performed
49
53
  self.overflow = b""
50
54
 
51
- def __getattr__(self, attr: str):
55
+ def __getattr__(self, attr: str) -> Any: # noqa: D105, ANN401
52
56
  return getattr(self.buffer, attr)
53
57
 
54
- def read(self, n: Union[int, None] = -1) -> bytes:
58
+ def read(self, n: Union[int, None] = -1) -> bytes: # noqa: D102
55
59
  assert self.buffer is not None
56
60
  bytestring = self.buffer.read(n).encode(self.encoding)
57
61
  # When n=-1/n greater than remaining bytes: Read entire file/rest of file
@@ -63,4 +67,3 @@ class BytesIOWrapper:
63
67
  to_return = combined_bytestring[:n]
64
68
  self.overflow = combined_bytestring[n:]
65
69
  return to_return
66
-
@@ -1,2 +1,3 @@
1
- from . import spark
1
+ from . import spark # noqa: D104
2
+
2
3
  __all__ = spark.__all__
@@ -1,7 +1,6 @@
1
- from .sql import SparkSession, DataFrame
2
- from .conf import SparkConf
1
+ from .conf import SparkConf # noqa: D104
3
2
  from .context import SparkContext
4
- from ._globals import _NoValue
5
3
  from .exception import ContributionsAcceptedError
4
+ from .sql import DataFrame, SparkSession
6
5
 
7
- __all__ = ["SparkSession", "DataFrame", "SparkConf", "SparkContext", "ContributionsAcceptedError"]
6
+ __all__ = ["ContributionsAcceptedError", "DataFrame", "SparkConf", "SparkContext", "SparkSession"]
@@ -15,8 +15,7 @@
15
15
  # limitations under the License.
16
16
  #
17
17
 
18
- """
19
- Module defining global singleton classes.
18
+ """Module defining global singleton classes.
20
19
 
21
20
  This module raises a RuntimeError if an attempt to reload it is made. In that
22
21
  way the identities of the classes defined here are fixed and will remain so
@@ -38,7 +37,8 @@ __ALL__ = ["_NoValue"]
38
37
  # Disallow reloading this module so as to preserve the identities of the
39
38
  # classes defined here.
40
39
  if "_is_loaded" in globals():
41
- raise RuntimeError("Reloading duckdb.experimental.spark._globals is not allowed")
40
+ msg = "Reloading duckdb.experimental.spark._globals is not allowed"
41
+ raise RuntimeError(msg)
42
42
  _is_loaded = True
43
43
 
44
44
 
@@ -54,23 +54,23 @@ class _NoValueType:
54
54
 
55
55
  __instance = None
56
56
 
57
- def __new__(cls):
57
+ def __new__(cls) -> "_NoValueType":
58
58
  # ensure that only one instance exists
59
59
  if not cls.__instance:
60
- cls.__instance = super(_NoValueType, cls).__new__(cls)
60
+ cls.__instance = super().__new__(cls)
61
61
  return cls.__instance
62
62
 
63
63
  # Make the _NoValue instance falsey
64
- def __nonzero__(self):
64
+ def __nonzero__(self) -> bool:
65
65
  return False
66
66
 
67
67
  __bool__ = __nonzero__
68
68
 
69
69
  # needed for python 2 to preserve identity through a pickle
70
- def __reduce__(self):
70
+ def __reduce__(self) -> tuple[type, tuple]:
71
71
  return (self.__class__, ())
72
72
 
73
- def __repr__(self):
73
+ def __repr__(self) -> str:
74
74
  return "<no value>"
75
75
 
76
76
 
@@ -16,10 +16,11 @@
16
16
  # specific language governing permissions and limitations
17
17
  # under the License.
18
18
 
19
- from typing import Callable, Iterable, Sized, TypeVar, Union
20
- from typing_extensions import Literal, Protocol
19
+ from collections.abc import Iterable, Sized
20
+ from typing import Callable, TypeVar, Union
21
21
 
22
- from numpy import int32, int64, float32, float64, ndarray
22
+ from numpy import float32, float64, int32, int64, ndarray
23
+ from typing_extensions import Literal, Protocol, Self
23
24
 
24
25
  F = TypeVar("F", bound=Callable)
25
26
  T_co = TypeVar("T_co", covariant=True)
@@ -30,17 +31,14 @@ NonUDFType = Literal[0]
30
31
 
31
32
 
32
33
  class SupportsIAdd(Protocol):
33
- def __iadd__(self, other: "SupportsIAdd") -> "SupportsIAdd":
34
- ...
34
+ def __iadd__(self, other: "SupportsIAdd") -> Self: ...
35
35
 
36
36
 
37
37
  class SupportsOrdering(Protocol):
38
- def __lt__(self, other: "SupportsOrdering") -> bool:
39
- ...
38
+ def __lt__(self, other: "SupportsOrdering") -> bool: ...
40
39
 
41
40
 
42
- class SizedIterable(Protocol, Sized, Iterable[T_co]):
43
- ...
41
+ class SizedIterable(Protocol, Sized, Iterable[T_co]): ...
44
42
 
45
43
 
46
44
  S = TypeVar("S", bound=SupportsOrdering)
@@ -1,44 +1,45 @@
1
- from typing import Optional, List, Tuple
1
+ from typing import Optional # noqa: D100
2
+
2
3
  from duckdb.experimental.spark.exception import ContributionsAcceptedError
3
4
 
4
5
 
5
- class SparkConf:
6
- def __init__(self):
6
+ class SparkConf: # noqa: D101
7
+ def __init__(self) -> None: # noqa: D107
7
8
  raise NotImplementedError
8
9
 
9
- def contains(self, key: str) -> bool:
10
+ def contains(self, key: str) -> bool: # noqa: D102
10
11
  raise ContributionsAcceptedError
11
12
 
12
- def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]:
13
+ def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]: # noqa: D102
13
14
  raise ContributionsAcceptedError
14
15
 
15
- def getAll(self) -> List[Tuple[str, str]]:
16
+ def getAll(self) -> list[tuple[str, str]]: # noqa: D102
16
17
  raise ContributionsAcceptedError
17
18
 
18
- def set(self, key: str, value: str) -> "SparkConf":
19
+ def set(self, key: str, value: str) -> "SparkConf": # noqa: D102
19
20
  raise ContributionsAcceptedError
20
21
 
21
- def setAll(self, pairs: List[Tuple[str, str]]) -> "SparkConf":
22
+ def setAll(self, pairs: list[tuple[str, str]]) -> "SparkConf": # noqa: D102
22
23
  raise ContributionsAcceptedError
23
24
 
24
- def setAppName(self, value: str) -> "SparkConf":
25
+ def setAppName(self, value: str) -> "SparkConf": # noqa: D102
25
26
  raise ContributionsAcceptedError
26
27
 
27
- def setExecutorEnv(
28
- self, key: Optional[str] = None, value: Optional[str] = None, pairs: Optional[List[Tuple[str, str]]] = None
28
+ def setExecutorEnv( # noqa: D102
29
+ self, key: Optional[str] = None, value: Optional[str] = None, pairs: Optional[list[tuple[str, str]]] = None
29
30
  ) -> "SparkConf":
30
31
  raise ContributionsAcceptedError
31
32
 
32
- def setIfMissing(self, key: str, value: str) -> "SparkConf":
33
+ def setIfMissing(self, key: str, value: str) -> "SparkConf": # noqa: D102
33
34
  raise ContributionsAcceptedError
34
35
 
35
- def setMaster(self, value: str) -> "SparkConf":
36
+ def setMaster(self, value: str) -> "SparkConf": # noqa: D102
36
37
  raise ContributionsAcceptedError
37
38
 
38
- def setSparkHome(self, value: str) -> "SparkConf":
39
+ def setSparkHome(self, value: str) -> "SparkConf": # noqa: D102
39
40
  raise ContributionsAcceptedError
40
41
 
41
- def toDebugString(self) -> str:
42
+ def toDebugString(self) -> str: # noqa: D102
42
43
  raise ContributionsAcceptedError
43
44
 
44
45