duckdb 1.4.0.dev2849__cp312-cp312-win_amd64.whl → 1.4.2.dev1__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckdb might be problematic. Click here for more details.
- _duckdb-stubs/__init__.pyi +1443 -0
- _duckdb-stubs/_func.pyi +46 -0
- _duckdb-stubs/_sqltypes.pyi +75 -0
- duckdb/duckdb.cp312-win_amd64.pyd → _duckdb.cp312-win_amd64.pyd +0 -0
- adbc_driver_duckdb/__init__.py +11 -8
- adbc_driver_duckdb/dbapi.py +2 -3
- duckdb/__init__.py +343 -388
- duckdb/_dbapi_type_object.py +231 -0
- duckdb/_version.py +22 -0
- duckdb/bytes_io_wrapper.py +12 -9
- duckdb/experimental/__init__.py +2 -1
- duckdb/experimental/spark/__init__.py +3 -4
- duckdb/experimental/spark/_globals.py +8 -8
- duckdb/experimental/spark/_typing.py +7 -9
- duckdb/experimental/spark/conf.py +16 -15
- duckdb/experimental/spark/context.py +60 -44
- duckdb/experimental/spark/errors/__init__.py +33 -35
- duckdb/experimental/spark/errors/error_classes.py +1 -1
- duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
- duckdb/experimental/spark/errors/exceptions/base.py +39 -88
- duckdb/experimental/spark/errors/utils.py +11 -16
- duckdb/experimental/spark/exception.py +9 -6
- duckdb/experimental/spark/sql/__init__.py +5 -5
- duckdb/experimental/spark/sql/_typing.py +8 -15
- duckdb/experimental/spark/sql/catalog.py +21 -20
- duckdb/experimental/spark/sql/column.py +48 -55
- duckdb/experimental/spark/sql/conf.py +9 -8
- duckdb/experimental/spark/sql/dataframe.py +213 -231
- duckdb/experimental/spark/sql/functions.py +1317 -1220
- duckdb/experimental/spark/sql/group.py +56 -52
- duckdb/experimental/spark/sql/readwriter.py +80 -94
- duckdb/experimental/spark/sql/session.py +64 -59
- duckdb/experimental/spark/sql/streaming.py +9 -10
- duckdb/experimental/spark/sql/type_utils.py +67 -65
- duckdb/experimental/spark/sql/types.py +309 -345
- duckdb/experimental/spark/sql/udf.py +6 -6
- duckdb/filesystem.py +26 -16
- duckdb/func/__init__.py +3 -0
- duckdb/functional/__init__.py +12 -16
- duckdb/polars_io.py +141 -82
- duckdb/query_graph/__main__.py +91 -96
- duckdb/sqltypes/__init__.py +63 -0
- duckdb/typing/__init__.py +18 -8
- duckdb/udf.py +10 -5
- duckdb/value/__init__.py +1 -0
- duckdb/value/{constant.py → constant/__init__.py} +62 -60
- duckdb-1.4.2.dev1.dist-info/METADATA +326 -0
- duckdb-1.4.2.dev1.dist-info/RECORD +52 -0
- {duckdb-1.4.0.dev2849.dist-info → duckdb-1.4.2.dev1.dist-info}/WHEEL +1 -1
- duckdb-1.4.2.dev1.dist-info/licenses/LICENSE +7 -0
- duckdb-1.4.0.dev2849.dist-info/METADATA +0 -47
- duckdb-1.4.0.dev2849.dist-info/RECORD +0 -48
- duckdb-1.4.0.dev2849.dist-info/top_level.txt +0 -3
- duckdb-stubs/__init__.pyi +0 -712
- duckdb-stubs/functional/__init__.pyi +0 -33
- duckdb-stubs/typing/__init__.pyi +0 -37
- duckdb-stubs/value/constant/__init__.pyi +0 -116
- /duckdb-stubs/value/__init__.pyi → /duckdb/py.typed +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
from
|
|
3
|
-
|
|
1
|
+
# ruff: noqa: D101, D104, D105, D107, ANN401
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from duckdb.sqltypes import (
|
|
4
5
|
BIGINT,
|
|
5
6
|
BIT,
|
|
6
7
|
BLOB,
|
|
@@ -9,30 +10,31 @@ from duckdb.typing import (
|
|
|
9
10
|
DOUBLE,
|
|
10
11
|
FLOAT,
|
|
11
12
|
HUGEINT,
|
|
12
|
-
UHUGEINT,
|
|
13
13
|
INTEGER,
|
|
14
14
|
INTERVAL,
|
|
15
15
|
SMALLINT,
|
|
16
16
|
SQLNULL,
|
|
17
17
|
TIME,
|
|
18
|
+
TIME_TZ,
|
|
18
19
|
TIMESTAMP,
|
|
19
20
|
TIMESTAMP_MS,
|
|
20
21
|
TIMESTAMP_NS,
|
|
21
22
|
TIMESTAMP_S,
|
|
22
23
|
TIMESTAMP_TZ,
|
|
23
|
-
TIME_TZ,
|
|
24
24
|
TINYINT,
|
|
25
25
|
UBIGINT,
|
|
26
|
+
UHUGEINT,
|
|
26
27
|
UINTEGER,
|
|
27
28
|
USMALLINT,
|
|
28
29
|
UTINYINT,
|
|
29
30
|
UUID,
|
|
30
31
|
VARCHAR,
|
|
32
|
+
DuckDBPyType,
|
|
31
33
|
)
|
|
32
34
|
|
|
33
35
|
|
|
34
36
|
class Value:
|
|
35
|
-
def __init__(self, object: Any, type: DuckDBPyType):
|
|
37
|
+
def __init__(self, object: Any, type: DuckDBPyType) -> None:
|
|
36
38
|
self.object = object
|
|
37
39
|
self.type = type
|
|
38
40
|
|
|
@@ -44,12 +46,12 @@ class Value:
|
|
|
44
46
|
|
|
45
47
|
|
|
46
48
|
class NullValue(Value):
|
|
47
|
-
def __init__(self):
|
|
49
|
+
def __init__(self) -> None:
|
|
48
50
|
super().__init__(None, SQLNULL)
|
|
49
51
|
|
|
50
52
|
|
|
51
53
|
class BooleanValue(Value):
|
|
52
|
-
def __init__(self, object: Any):
|
|
54
|
+
def __init__(self, object: Any) -> None:
|
|
53
55
|
super().__init__(object, BOOLEAN)
|
|
54
56
|
|
|
55
57
|
|
|
@@ -57,22 +59,22 @@ class BooleanValue(Value):
|
|
|
57
59
|
|
|
58
60
|
|
|
59
61
|
class UnsignedBinaryValue(Value):
|
|
60
|
-
def __init__(self, object: Any):
|
|
62
|
+
def __init__(self, object: Any) -> None:
|
|
61
63
|
super().__init__(object, UTINYINT)
|
|
62
64
|
|
|
63
65
|
|
|
64
66
|
class UnsignedShortValue(Value):
|
|
65
|
-
def __init__(self, object: Any):
|
|
67
|
+
def __init__(self, object: Any) -> None:
|
|
66
68
|
super().__init__(object, USMALLINT)
|
|
67
69
|
|
|
68
70
|
|
|
69
71
|
class UnsignedIntegerValue(Value):
|
|
70
|
-
def __init__(self, object: Any):
|
|
72
|
+
def __init__(self, object: Any) -> None:
|
|
71
73
|
super().__init__(object, UINTEGER)
|
|
72
74
|
|
|
73
75
|
|
|
74
76
|
class UnsignedLongValue(Value):
|
|
75
|
-
def __init__(self, object: Any):
|
|
77
|
+
def __init__(self, object: Any) -> None:
|
|
76
78
|
super().__init__(object, UBIGINT)
|
|
77
79
|
|
|
78
80
|
|
|
@@ -80,32 +82,32 @@ class UnsignedLongValue(Value):
|
|
|
80
82
|
|
|
81
83
|
|
|
82
84
|
class BinaryValue(Value):
|
|
83
|
-
def __init__(self, object: Any):
|
|
85
|
+
def __init__(self, object: Any) -> None:
|
|
84
86
|
super().__init__(object, TINYINT)
|
|
85
87
|
|
|
86
88
|
|
|
87
89
|
class ShortValue(Value):
|
|
88
|
-
def __init__(self, object: Any):
|
|
90
|
+
def __init__(self, object: Any) -> None:
|
|
89
91
|
super().__init__(object, SMALLINT)
|
|
90
92
|
|
|
91
93
|
|
|
92
94
|
class IntegerValue(Value):
|
|
93
|
-
def __init__(self, object: Any):
|
|
95
|
+
def __init__(self, object: Any) -> None:
|
|
94
96
|
super().__init__(object, INTEGER)
|
|
95
97
|
|
|
96
98
|
|
|
97
99
|
class LongValue(Value):
|
|
98
|
-
def __init__(self, object: Any):
|
|
100
|
+
def __init__(self, object: Any) -> None:
|
|
99
101
|
super().__init__(object, BIGINT)
|
|
100
102
|
|
|
101
103
|
|
|
102
104
|
class HugeIntegerValue(Value):
|
|
103
|
-
def __init__(self, object: Any):
|
|
105
|
+
def __init__(self, object: Any) -> None:
|
|
104
106
|
super().__init__(object, HUGEINT)
|
|
105
107
|
|
|
106
108
|
|
|
107
109
|
class UnsignedHugeIntegerValue(Value):
|
|
108
|
-
def __init__(self, object: Any):
|
|
110
|
+
def __init__(self, object: Any) -> None:
|
|
109
111
|
super().__init__(object, UHUGEINT)
|
|
110
112
|
|
|
111
113
|
|
|
@@ -113,17 +115,17 @@ class UnsignedHugeIntegerValue(Value):
|
|
|
113
115
|
|
|
114
116
|
|
|
115
117
|
class FloatValue(Value):
|
|
116
|
-
def __init__(self, object: Any):
|
|
118
|
+
def __init__(self, object: Any) -> None:
|
|
117
119
|
super().__init__(object, FLOAT)
|
|
118
120
|
|
|
119
121
|
|
|
120
122
|
class DoubleValue(Value):
|
|
121
|
-
def __init__(self, object: Any):
|
|
123
|
+
def __init__(self, object: Any) -> None:
|
|
122
124
|
super().__init__(object, DOUBLE)
|
|
123
125
|
|
|
124
126
|
|
|
125
127
|
class DecimalValue(Value):
|
|
126
|
-
def __init__(self, object: Any, width: int, scale: int):
|
|
128
|
+
def __init__(self, object: Any, width: int, scale: int) -> None:
|
|
127
129
|
import duckdb
|
|
128
130
|
|
|
129
131
|
decimal_type = duckdb.decimal_type(width, scale)
|
|
@@ -134,22 +136,22 @@ class DecimalValue(Value):
|
|
|
134
136
|
|
|
135
137
|
|
|
136
138
|
class StringValue(Value):
|
|
137
|
-
def __init__(self, object: Any):
|
|
139
|
+
def __init__(self, object: Any) -> None:
|
|
138
140
|
super().__init__(object, VARCHAR)
|
|
139
141
|
|
|
140
142
|
|
|
141
143
|
class UUIDValue(Value):
|
|
142
|
-
def __init__(self, object: Any):
|
|
144
|
+
def __init__(self, object: Any) -> None:
|
|
143
145
|
super().__init__(object, UUID)
|
|
144
146
|
|
|
145
147
|
|
|
146
148
|
class BitValue(Value):
|
|
147
|
-
def __init__(self, object: Any):
|
|
149
|
+
def __init__(self, object: Any) -> None:
|
|
148
150
|
super().__init__(object, BIT)
|
|
149
151
|
|
|
150
152
|
|
|
151
153
|
class BlobValue(Value):
|
|
152
|
-
def __init__(self, object: Any):
|
|
154
|
+
def __init__(self, object: Any) -> None:
|
|
153
155
|
super().__init__(object, BLOB)
|
|
154
156
|
|
|
155
157
|
|
|
@@ -157,52 +159,52 @@ class BlobValue(Value):
|
|
|
157
159
|
|
|
158
160
|
|
|
159
161
|
class DateValue(Value):
|
|
160
|
-
def __init__(self, object: Any):
|
|
162
|
+
def __init__(self, object: Any) -> None:
|
|
161
163
|
super().__init__(object, DATE)
|
|
162
164
|
|
|
163
165
|
|
|
164
166
|
class IntervalValue(Value):
|
|
165
|
-
def __init__(self, object: Any):
|
|
167
|
+
def __init__(self, object: Any) -> None:
|
|
166
168
|
super().__init__(object, INTERVAL)
|
|
167
169
|
|
|
168
170
|
|
|
169
171
|
class TimestampValue(Value):
|
|
170
|
-
def __init__(self, object: Any):
|
|
172
|
+
def __init__(self, object: Any) -> None:
|
|
171
173
|
super().__init__(object, TIMESTAMP)
|
|
172
174
|
|
|
173
175
|
|
|
174
176
|
class TimestampSecondValue(Value):
|
|
175
|
-
def __init__(self, object: Any):
|
|
177
|
+
def __init__(self, object: Any) -> None:
|
|
176
178
|
super().__init__(object, TIMESTAMP_S)
|
|
177
179
|
|
|
178
180
|
|
|
179
181
|
class TimestampMilisecondValue(Value):
|
|
180
|
-
def __init__(self, object: Any):
|
|
182
|
+
def __init__(self, object: Any) -> None:
|
|
181
183
|
super().__init__(object, TIMESTAMP_MS)
|
|
182
184
|
|
|
183
185
|
|
|
184
186
|
class TimestampNanosecondValue(Value):
|
|
185
|
-
def __init__(self, object: Any):
|
|
187
|
+
def __init__(self, object: Any) -> None:
|
|
186
188
|
super().__init__(object, TIMESTAMP_NS)
|
|
187
189
|
|
|
188
190
|
|
|
189
191
|
class TimestampTimeZoneValue(Value):
|
|
190
|
-
def __init__(self, object: Any):
|
|
192
|
+
def __init__(self, object: Any) -> None:
|
|
191
193
|
super().__init__(object, TIMESTAMP_TZ)
|
|
192
194
|
|
|
193
195
|
|
|
194
196
|
class TimeValue(Value):
|
|
195
|
-
def __init__(self, object: Any):
|
|
197
|
+
def __init__(self, object: Any) -> None:
|
|
196
198
|
super().__init__(object, TIME)
|
|
197
199
|
|
|
198
200
|
|
|
199
201
|
class TimeTimeZoneValue(Value):
|
|
200
|
-
def __init__(self, object: Any):
|
|
202
|
+
def __init__(self, object: Any) -> None:
|
|
201
203
|
super().__init__(object, TIME_TZ)
|
|
202
204
|
|
|
203
205
|
|
|
204
206
|
class ListValue(Value):
|
|
205
|
-
def __init__(self, object: Any, child_type: DuckDBPyType):
|
|
207
|
+
def __init__(self, object: Any, child_type: DuckDBPyType) -> None:
|
|
206
208
|
import duckdb
|
|
207
209
|
|
|
208
210
|
list_type = duckdb.list_type(child_type)
|
|
@@ -210,7 +212,7 @@ class ListValue(Value):
|
|
|
210
212
|
|
|
211
213
|
|
|
212
214
|
class StructValue(Value):
|
|
213
|
-
def __init__(self, object: Any, children:
|
|
215
|
+
def __init__(self, object: Any, children: dict[str, DuckDBPyType]) -> None:
|
|
214
216
|
import duckdb
|
|
215
217
|
|
|
216
218
|
struct_type = duckdb.struct_type(children)
|
|
@@ -218,7 +220,7 @@ class StructValue(Value):
|
|
|
218
220
|
|
|
219
221
|
|
|
220
222
|
class MapValue(Value):
|
|
221
|
-
def __init__(self, object: Any, key_type: DuckDBPyType, value_type: DuckDBPyType):
|
|
223
|
+
def __init__(self, object: Any, key_type: DuckDBPyType, value_type: DuckDBPyType) -> None:
|
|
222
224
|
import duckdb
|
|
223
225
|
|
|
224
226
|
map_type = duckdb.map_type(key_type, value_type)
|
|
@@ -226,43 +228,43 @@ class MapValue(Value):
|
|
|
226
228
|
|
|
227
229
|
|
|
228
230
|
class UnionType(Value):
|
|
229
|
-
def __init__(self, object: Any, members:
|
|
231
|
+
def __init__(self, object: Any, members: dict[str, DuckDBPyType]) -> None:
|
|
230
232
|
import duckdb
|
|
231
233
|
|
|
232
234
|
union_type = duckdb.union_type(members)
|
|
233
235
|
super().__init__(object, union_type)
|
|
234
236
|
|
|
235
237
|
|
|
236
|
-
# TODO: add EnumValue once `duckdb.enum_type` is added
|
|
238
|
+
# TODO: add EnumValue once `duckdb.enum_type` is added # noqa: TD002, TD003
|
|
237
239
|
|
|
238
240
|
__all__ = [
|
|
239
|
-
"Value",
|
|
240
|
-
"NullValue",
|
|
241
|
-
"BooleanValue",
|
|
242
|
-
"UnsignedBinaryValue",
|
|
243
|
-
"UnsignedShortValue",
|
|
244
|
-
"UnsignedIntegerValue",
|
|
245
|
-
"UnsignedLongValue",
|
|
246
241
|
"BinaryValue",
|
|
247
|
-
"ShortValue",
|
|
248
|
-
"IntegerValue",
|
|
249
|
-
"LongValue",
|
|
250
|
-
"HugeIntegerValue",
|
|
251
|
-
"UnsignedHugeIntegerValue",
|
|
252
|
-
"FloatValue",
|
|
253
|
-
"DoubleValue",
|
|
254
|
-
"DecimalValue",
|
|
255
|
-
"StringValue",
|
|
256
|
-
"UUIDValue",
|
|
257
242
|
"BitValue",
|
|
258
243
|
"BlobValue",
|
|
244
|
+
"BooleanValue",
|
|
259
245
|
"DateValue",
|
|
246
|
+
"DecimalValue",
|
|
247
|
+
"DoubleValue",
|
|
248
|
+
"FloatValue",
|
|
249
|
+
"HugeIntegerValue",
|
|
250
|
+
"IntegerValue",
|
|
260
251
|
"IntervalValue",
|
|
261
|
-
"
|
|
262
|
-
"
|
|
252
|
+
"LongValue",
|
|
253
|
+
"NullValue",
|
|
254
|
+
"ShortValue",
|
|
255
|
+
"StringValue",
|
|
256
|
+
"TimeTimeZoneValue",
|
|
257
|
+
"TimeValue",
|
|
263
258
|
"TimestampMilisecondValue",
|
|
264
259
|
"TimestampNanosecondValue",
|
|
260
|
+
"TimestampSecondValue",
|
|
265
261
|
"TimestampTimeZoneValue",
|
|
266
|
-
"
|
|
267
|
-
"
|
|
262
|
+
"TimestampValue",
|
|
263
|
+
"UUIDValue",
|
|
264
|
+
"UnsignedBinaryValue",
|
|
265
|
+
"UnsignedHugeIntegerValue",
|
|
266
|
+
"UnsignedIntegerValue",
|
|
267
|
+
"UnsignedLongValue",
|
|
268
|
+
"UnsignedShortValue",
|
|
269
|
+
"Value",
|
|
268
270
|
]
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: duckdb
|
|
3
|
+
Version: 1.4.2.dev1
|
|
4
|
+
Summary: DuckDB in-process database
|
|
5
|
+
Keywords: DuckDB,Database,SQL,OLAP
|
|
6
|
+
Author: DuckDB Foundation
|
|
7
|
+
Maintainer: DuckDB Foundation
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Topic :: Database
|
|
12
|
+
Classifier: Topic :: Database :: Database Engines/Servers
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Education
|
|
16
|
+
Classifier: Intended Audience :: Information Technology
|
|
17
|
+
Classifier: Intended Audience :: Science/Research
|
|
18
|
+
Classifier: Programming Language :: Python
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
26
|
+
Classifier: Programming Language :: C++
|
|
27
|
+
Project-URL: Documentation, https://duckdb.org/docs/stable/clients/python/overview
|
|
28
|
+
Project-URL: Source, https://github.com/duckdb/duckdb-python
|
|
29
|
+
Project-URL: Issues, https://github.com/duckdb/duckdb-python/issues
|
|
30
|
+
Project-URL: Changelog, https://github.com/duckdb/duckdb/releases
|
|
31
|
+
Requires-Python: >=3.9.0
|
|
32
|
+
Provides-Extra: all
|
|
33
|
+
Requires-Dist: ipython; extra == "all"
|
|
34
|
+
Requires-Dist: fsspec; extra == "all"
|
|
35
|
+
Requires-Dist: numpy; extra == "all"
|
|
36
|
+
Requires-Dist: pandas; extra == "all"
|
|
37
|
+
Requires-Dist: pyarrow; extra == "all"
|
|
38
|
+
Requires-Dist: adbc-driver-manager; extra == "all"
|
|
39
|
+
Description-Content-Type: text/markdown
|
|
40
|
+
|
|
41
|
+
<div align="center">
|
|
42
|
+
<picture>
|
|
43
|
+
<source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/duckdb/duckdb/refs/heads/main/logo/DuckDB_Logo-horizontal.svg">
|
|
44
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/duckdb/duckdb/refs/heads/main/logo/DuckDB_Logo-horizontal-dark-mode.svg">
|
|
45
|
+
<img alt="DuckDB logo" src="https://raw.githubusercontent.com/duckdb/duckdb/refs/heads/main/logo/DuckDB_Logo-horizontal.svg" height="100">
|
|
46
|
+
</picture>
|
|
47
|
+
</div>
|
|
48
|
+
<br />
|
|
49
|
+
<p align="center">
|
|
50
|
+
<a href="https://discord.gg/tcvwpjfnZx"><img src="https://shields.io/discord/909674491309850675" alt="Discord" /></a>
|
|
51
|
+
<a href="https://pypi.org/project/duckdb/"><img src="https://img.shields.io/pypi/v/duckdb.svg" alt="PyPI Latest Release"/></a>
|
|
52
|
+
</p>
|
|
53
|
+
<br />
|
|
54
|
+
<p align="center">
|
|
55
|
+
<a href="https://duckdb.org">DuckDB.org</a>
|
|
56
|
+
|
|
|
57
|
+
<a href="https://duckdb.org/docs/stable/guides/python/install">User Guide (Python)</a>
|
|
58
|
+
-
|
|
59
|
+
<a href="https://duckdb.org/docs/stable/clients/python/overview">API Docs (Python)</a>
|
|
60
|
+
</p>
|
|
61
|
+
|
|
62
|
+
# DuckDB: A Fast, In-Process, Portable, Open Source, Analytical Database System
|
|
63
|
+
|
|
64
|
+
* **Simple**: DuckDB is easy to install and deploy. It has zero external dependencies and runs in-process in its host application or as a single binary.
|
|
65
|
+
* **Portable**: DuckDB runs on Linux, macOS, Windows, Android, iOS and all popular hardware architectures. It has idiomatic client APIs for major programming languages.
|
|
66
|
+
* **Feature-rich**: DuckDB offers a rich SQL dialect. It can read and write file formats such as CSV, Parquet, and JSON, to and from the local file system and remote endpoints such as S3 buckets.
|
|
67
|
+
* **Fast**: DuckDB runs analytical queries at blazing speed thanks to its columnar engine, which supports parallel execution and can process larger-than-memory workloads.
|
|
68
|
+
* **Extensible**: DuckDB is extensible by third-party features such as new data types, functions, file formats and new SQL syntax. User contributions are available as community extensions.
|
|
69
|
+
* **Free**: DuckDB and its core extensions are open-source under the permissive MIT License. The intellectual property of the project is held by the DuckDB Foundation.
|
|
70
|
+
|
|
71
|
+
## Installation
|
|
72
|
+
|
|
73
|
+
Install the latest release of DuckDB directly from [PyPI](https://pypi.org/project/duckdb/):
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install duckdb
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Install with all optional dependencies:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install 'duckdb[all]'
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Development
|
|
86
|
+
|
|
87
|
+
Start by <a href="https://github.com/duckdb/duckdb-python/fork"><svg height="16" viewBox="0 0 16 16" version="1.1" width="16">
|
|
88
|
+
<path fill-rule="evenodd" d="M5 3.25a.75.75 0 11-1.5 0 .75.75 0 011.5 0zm0 2.122a2.25 2.25 0 10-1.5 0v.878A2.25 2.25 0 005.75 8.5h1.5v2.128a2.251 2.251 0 101.5 0V8.5h1.5a2.25 2.25 0 002.25-2.25v-.878a2.25 2.25 0 10-1.5 0v.878a.75.75 0 01-.75.75h-4.5A.75.75 0 015 6.25v-.878z"></path>
|
|
89
|
+
</svg>forking duckdb-python</a>.
|
|
90
|
+
|
|
91
|
+
### Cloning
|
|
92
|
+
|
|
93
|
+
After forking the duckdb-python repo we recommend you clone your fork as follows:
|
|
94
|
+
```shell
|
|
95
|
+
git clone --recurse-submodules $REPO_URL
|
|
96
|
+
git remote add upstream https://github.com/duckdb/duckdb-python.git
|
|
97
|
+
git fetch --all
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
... or, if you have already cloned your fork:
|
|
101
|
+
```shell
|
|
102
|
+
git submodule update --init --recursive
|
|
103
|
+
git remote add upstream https://github.com/duckdb/duckdb-python.git
|
|
104
|
+
git fetch --all
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Submodule update hook
|
|
108
|
+
|
|
109
|
+
If you'll be switching between branches that are have the submodule set to different refs, then make your life
|
|
110
|
+
easier and add the git hooks in the .githooks directory to your git hooks:
|
|
111
|
+
```shell
|
|
112
|
+
cp .githooks/post-checkout .git/hooks/
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
### Editable installs (general)
|
|
117
|
+
|
|
118
|
+
It's good to be aware of the following when performing an editable install:
|
|
119
|
+
- `uv sync` or `uv run [tool]` perform an editable install by default. We have
|
|
120
|
+
configured the project so that scikit-build-core will use a persistent build-dir, but since the build itself
|
|
121
|
+
happens in an isolated, ephemeral environment, cmake's paths will point to non-existing directories. CMake itself
|
|
122
|
+
will be missing.
|
|
123
|
+
- You should install all development dependencies, and then build the project without build isolation, in two separate
|
|
124
|
+
steps. After this you can happily keep building and running, as long as you don't forget to pass in the
|
|
125
|
+
`--no-build-isolation` flag.
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
# install all dev dependencies without building the project (needed once)
|
|
129
|
+
uv sync -p 3.11 --no-install-project
|
|
130
|
+
# build and install without build isolation
|
|
131
|
+
uv sync --no-build-isolation
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Editable installs (IDEs)
|
|
135
|
+
|
|
136
|
+
If you're using an IDE then life is a little simpler. You install build dependencies and the project in the two
|
|
137
|
+
steps outlined above, and from that point on you can rely on e.g. CLion's cmake capabilities to do incremental
|
|
138
|
+
compilation and editable rebuilds. This will skip scikit-build-core's build backend and all of uv's dependency
|
|
139
|
+
management, so for "real" builds you better revert to the CLI. However, this should work fine for coding and debugging.
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
### Cleaning
|
|
143
|
+
|
|
144
|
+
```shell
|
|
145
|
+
uv cache clean
|
|
146
|
+
rm -rf build .venv uv.lock
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
### Building wheels and sdists
|
|
151
|
+
|
|
152
|
+
To build a wheel and sdist for your system and the default Python version:
|
|
153
|
+
```bash
|
|
154
|
+
uv build
|
|
155
|
+
````
|
|
156
|
+
|
|
157
|
+
To build a wheel for a different Python version:
|
|
158
|
+
```bash
|
|
159
|
+
# E.g. for Python 3.9
|
|
160
|
+
uv build -p 3.9
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Running tests
|
|
164
|
+
|
|
165
|
+
Run all pytests:
|
|
166
|
+
```bash
|
|
167
|
+
uv run --no-build-isolation pytest ./tests --verbose
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Exclude the test/slow directory:
|
|
171
|
+
```bash
|
|
172
|
+
uv run --no-build-isolation pytest ./tests --verbose --ignore=./tests/slow
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Test coverage
|
|
176
|
+
|
|
177
|
+
Run with coverage (during development you probably want to specify which tests to run):
|
|
178
|
+
```bash
|
|
179
|
+
COVERAGE=1 uv run --no-build-isolation coverage run -m pytest ./tests --verbose
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
The `COVERAGE` env var will compile the extension with `--coverage`, allowing us to collect coverage stats of C++
|
|
183
|
+
code as well as Python code.
|
|
184
|
+
|
|
185
|
+
Check coverage for Python code:
|
|
186
|
+
```bash
|
|
187
|
+
uvx coverage html -d htmlcov-python
|
|
188
|
+
uvx coverage report --format=markdown
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Check coverage for C++ code (note: this will clutter your project dir with html files, consider saving them in some
|
|
192
|
+
other place):
|
|
193
|
+
```bash
|
|
194
|
+
uvx gcovr \
|
|
195
|
+
--gcov-ignore-errors all \
|
|
196
|
+
--root "$PWD" \
|
|
197
|
+
--filter "${PWD}/src/duckdb_py" \
|
|
198
|
+
--exclude '.*/\.cache/.*' \
|
|
199
|
+
--gcov-exclude '.*/\.cache/.*' \
|
|
200
|
+
--gcov-exclude '.*/external/.*' \
|
|
201
|
+
--gcov-exclude '.*/site-packages/.*' \
|
|
202
|
+
--exclude-unreachable-branches \
|
|
203
|
+
--exclude-throw-branches \
|
|
204
|
+
--html --html-details -o coverage-cpp.html \
|
|
205
|
+
build/coverage/src/duckdb_py \
|
|
206
|
+
--print-summary
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Typechecking and linting
|
|
210
|
+
|
|
211
|
+
- We're not running any mypy typechecking tests at the moment
|
|
212
|
+
- We're not running any Ruff / linting / formatting at the moment
|
|
213
|
+
|
|
214
|
+
### Cibuildwheel
|
|
215
|
+
|
|
216
|
+
You can run cibuildwheel locally for Linux. E.g. limited to Python 3.9:
|
|
217
|
+
```bash
|
|
218
|
+
CIBW_BUILD='cp39-*' uvx cibuildwheel --platform linux .
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### Code conventions
|
|
222
|
+
|
|
223
|
+
* Follow the [Google Python styleguide](https://google.github.io/styleguide/pyguide.html)
|
|
224
|
+
* See the section on [Comments and Docstrings](https://google.github.io/styleguide/pyguide.html#s3.8-comments-and-docstrings)
|
|
225
|
+
|
|
226
|
+
### Tooling
|
|
227
|
+
|
|
228
|
+
This codebase is developed with the following tools:
|
|
229
|
+
- [Astral uv](https://docs.astral.sh/uv/) - for dependency management across all platforms we provide wheels for,
|
|
230
|
+
and for Python environment management. It will be hard to work on this codebase without having UV installed.
|
|
231
|
+
- [Scikit-build-core](https://scikit-build-core.readthedocs.io/en/latest/index.html) - the build backend for
|
|
232
|
+
building the extension. On the background, scikit-build-core uses cmake and ninja for compilation.
|
|
233
|
+
- [pybind11](https://pybind11.readthedocs.io/en/stable/index.html) - a bridge between C++ and Python.
|
|
234
|
+
- [CMake](https://cmake.org/) - the build system for both DuckDB itself and the DuckDB Python module.
|
|
235
|
+
- Cibuildwheel
|
|
236
|
+
|
|
237
|
+
### Merging changes to pythonpkg from duckdb main
|
|
238
|
+
|
|
239
|
+
1. Checkout main
|
|
240
|
+
2Identify the merge commits that brought in tags to main:
|
|
241
|
+
```bash
|
|
242
|
+
git log --graph --oneline --decorate main --simplify-by-decoration
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
3. Get the log of commits
|
|
246
|
+
```bash
|
|
247
|
+
git log --oneline 71c5c07cdd..c9254ecff2 -- tools/pythonpkg/
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
4. Checkout v1.3-ossivalis
|
|
251
|
+
5. Get the log of commits
|
|
252
|
+
```bash
|
|
253
|
+
git log --oneline v1.3.0..v1.3.1 -- tools/pythonpkg/
|
|
254
|
+
```
|
|
255
|
+
git diff --name-status 71c5c07cdd c9254ecff2 -- tools/pythonpkg/
|
|
256
|
+
|
|
257
|
+
```bash
|
|
258
|
+
git log --oneline 71c5c07cdd..c9254ecff2 -- tools/pythonpkg/
|
|
259
|
+
git diff --name-status <HASH_A> <HASH_B> -- tools/pythonpkg/
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
## Versioning and Releases
|
|
264
|
+
|
|
265
|
+
The DuckDB Python package versioning and release scheme follows that of DuckDB itself. This means that a `X.Y.Z[.
|
|
266
|
+
postN]` release of the Python package ships the DuckDB stable release `X.Y.Z`. The optional `.postN` releases ship the same stable release of DuckDB as their predecessors plus Python package-specific fixes and / or features.
|
|
267
|
+
|
|
268
|
+
| Types | DuckDB Version | Resulting Python Extension Version |
|
|
269
|
+
|------------------------------------------------------------------------|----------------|------------------------------------|
|
|
270
|
+
| Stable release: DuckDB stable release | `1.3.1` | `1.3.1` |
|
|
271
|
+
| Stable post release: DuckDB stable release + Python fixes and features | `1.3.1` | `1.3.1.postX` |
|
|
272
|
+
| Nightly micro: DuckDB next micro nightly + Python next micro nightly | `1.3.2.devM` | `1.3.2.devN` |
|
|
273
|
+
| Nightly minor: DuckDB next minor nightly + Python next minor nightly | `1.4.0.devM` | `1.4.0.devN` |
|
|
274
|
+
|
|
275
|
+
Note that we do not ship nightly post releases (e.g. we don't ship `1.3.1.post2.dev3`).
|
|
276
|
+
|
|
277
|
+
### Branch and Tag Strategy
|
|
278
|
+
|
|
279
|
+
We cut releases as follows:
|
|
280
|
+
|
|
281
|
+
| Type | Tag | How |
|
|
282
|
+
|----------------------|--------------|---------------------------------------------------------------------------------|
|
|
283
|
+
| Stable minor release | vX.Y.0 | Adding a tag on `main` |
|
|
284
|
+
| Stable micro release | vX.Y.Z | Adding a tag on a minor release branch (e.g. `v1.3-ossivalis`) |
|
|
285
|
+
| Stable post release | vX.Y.Z-postN | Adding a tag on a post release branch (e.g. `v1.3.1-post`) |
|
|
286
|
+
| Nightly micro | _not tagged_ | Combining HEAD of the _micro_ release branches of DuckDB and the Python package |
|
|
287
|
+
| Nightly minor | _not tagged_ | Combining HEAD of the _minor_ release branches of DuckDB and the Python package |
|
|
288
|
+
|
|
289
|
+
### Release Runbooks
|
|
290
|
+
|
|
291
|
+
We cut a new **stable minor release** with the following steps:
|
|
292
|
+
1. Create a PR on `main` to pin the DuckDB submodule to the tag of its current release.
|
|
293
|
+
1. Iff all tests pass in CI, merge the PR.
|
|
294
|
+
1. Manually start the release workflow with the hash of this commit, and the tag name.
|
|
295
|
+
1. Iff all goes well, create a new PR to let the submodule track DuckDB main.
|
|
296
|
+
|
|
297
|
+
We cut a new **stable micro release** with the following steps:
|
|
298
|
+
1. Create a PR on the minor release branch to pin the DuckDB submodule to the tag of its current release.
|
|
299
|
+
1. Iff all tests pass in CI, merge the PR.
|
|
300
|
+
1. Manually start the release workflow with the hash of this commit, and the tag name.
|
|
301
|
+
1. Iff all goes well, create a new PR to let the submodule track DuckDB's minor release branch.
|
|
302
|
+
|
|
303
|
+
We cut a new **stable post release** with the following steps:
|
|
304
|
+
1. Create a PR on the post release branch to pin the DuckDB submodule to the tag of its current release.
|
|
305
|
+
1. Iff all tests pass in CI, merge the PR.
|
|
306
|
+
1. Manually start the release workflow with the hash of this commit, and the tag name.
|
|
307
|
+
1. Iff all goes well, create a new PR to let the submodule track DuckDB's minor release branch.
|
|
308
|
+
|
|
309
|
+
### Dynamic Versioning Integration
|
|
310
|
+
|
|
311
|
+
The package uses `setuptools_scm` with `scikit-build` for automatic version determination, and implements a custom
|
|
312
|
+
versioning scheme.
|
|
313
|
+
|
|
314
|
+
- **pyproject.toml configuration**:
|
|
315
|
+
```toml
|
|
316
|
+
[tool.scikit-build]
|
|
317
|
+
metadata.version.provider = "scikit_build_core.metadata.setuptools_scm"
|
|
318
|
+
|
|
319
|
+
[tool.setuptools_scm]
|
|
320
|
+
version_scheme = "duckdb_packaging._setuptools_scm_version:version_scheme"
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
- **Environment variables**:
|
|
324
|
+
- `MAIN_BRANCH_VERSIONING=0`: Use release branch versioning (patch increments)
|
|
325
|
+
- `MAIN_BRANCH_VERSIONING=1`: Use main branch versioning (minor increments)
|
|
326
|
+
- `OVERRIDE_GIT_DESCRIBE`: Override version detection
|