duckdb 1.5.0.dev53__cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckdb might be problematic. Click here for more details.
- _duckdb-stubs/__init__.pyi +1443 -0
- _duckdb-stubs/_func.pyi +46 -0
- _duckdb-stubs/_sqltypes.pyi +75 -0
- _duckdb.cpython-314-x86_64-linux-gnu.so +0 -0
- adbc_driver_duckdb/__init__.py +50 -0
- adbc_driver_duckdb/dbapi.py +115 -0
- duckdb/__init__.py +381 -0
- duckdb/_dbapi_type_object.py +231 -0
- duckdb/_version.py +22 -0
- duckdb/bytes_io_wrapper.py +69 -0
- duckdb/experimental/__init__.py +3 -0
- duckdb/experimental/spark/LICENSE +260 -0
- duckdb/experimental/spark/__init__.py +6 -0
- duckdb/experimental/spark/_globals.py +77 -0
- duckdb/experimental/spark/_typing.py +46 -0
- duckdb/experimental/spark/conf.py +46 -0
- duckdb/experimental/spark/context.py +180 -0
- duckdb/experimental/spark/errors/__init__.py +70 -0
- duckdb/experimental/spark/errors/error_classes.py +918 -0
- duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
- duckdb/experimental/spark/errors/exceptions/base.py +168 -0
- duckdb/experimental/spark/errors/utils.py +111 -0
- duckdb/experimental/spark/exception.py +18 -0
- duckdb/experimental/spark/sql/__init__.py +7 -0
- duckdb/experimental/spark/sql/_typing.py +86 -0
- duckdb/experimental/spark/sql/catalog.py +79 -0
- duckdb/experimental/spark/sql/column.py +361 -0
- duckdb/experimental/spark/sql/conf.py +24 -0
- duckdb/experimental/spark/sql/dataframe.py +1389 -0
- duckdb/experimental/spark/sql/functions.py +6195 -0
- duckdb/experimental/spark/sql/group.py +424 -0
- duckdb/experimental/spark/sql/readwriter.py +435 -0
- duckdb/experimental/spark/sql/session.py +297 -0
- duckdb/experimental/spark/sql/streaming.py +36 -0
- duckdb/experimental/spark/sql/type_utils.py +107 -0
- duckdb/experimental/spark/sql/types.py +1239 -0
- duckdb/experimental/spark/sql/udf.py +37 -0
- duckdb/filesystem.py +33 -0
- duckdb/func/__init__.py +3 -0
- duckdb/functional/__init__.py +13 -0
- duckdb/polars_io.py +284 -0
- duckdb/py.typed +0 -0
- duckdb/query_graph/__main__.py +358 -0
- duckdb/sqltypes/__init__.py +63 -0
- duckdb/typing/__init__.py +71 -0
- duckdb/udf.py +24 -0
- duckdb/value/__init__.py +1 -0
- duckdb/value/constant/__init__.py +270 -0
- duckdb-1.5.0.dev53.dist-info/METADATA +87 -0
- duckdb-1.5.0.dev53.dist-info/RECORD +52 -0
- duckdb-1.5.0.dev53.dist-info/WHEEL +6 -0
- duckdb-1.5.0.dev53.dist-info/licenses/LICENSE +7 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
from collections.abc import Iterable # noqa: D100
|
|
2
|
+
from typing import TYPE_CHECKING, Any, Callable, Union, cast
|
|
3
|
+
|
|
4
|
+
from ..exception import ContributionsAcceptedError
|
|
5
|
+
from .types import DataType
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from ._typing import DateTimeLiteral, DecimalLiteral, LiteralType
|
|
9
|
+
|
|
10
|
+
from duckdb import ColumnExpression, ConstantExpression, Expression, FunctionExpression
|
|
11
|
+
from duckdb.sqltypes import DuckDBPyType
|
|
12
|
+
|
|
13
|
+
__all__ = ["Column"]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _get_expr(x: Union["Column", str]) -> Expression:
|
|
17
|
+
return x.expr if isinstance(x, Column) else ConstantExpression(x)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _func_op(name: str, doc: str = "") -> Callable[["Column"], "Column"]:
|
|
21
|
+
def _(self: "Column") -> "Column":
|
|
22
|
+
njc = getattr(self.expr, name)()
|
|
23
|
+
return Column(njc)
|
|
24
|
+
|
|
25
|
+
_.__doc__ = doc
|
|
26
|
+
return _
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _unary_op(
|
|
30
|
+
name: str,
|
|
31
|
+
doc: str = "unary operator",
|
|
32
|
+
) -> Callable[["Column"], "Column"]:
|
|
33
|
+
"""Create a method for given unary operator."""
|
|
34
|
+
|
|
35
|
+
def _(self: "Column") -> "Column":
|
|
36
|
+
# Call the function identified by 'name' on the internal Expression object
|
|
37
|
+
expr = getattr(self.expr, name)()
|
|
38
|
+
return Column(expr)
|
|
39
|
+
|
|
40
|
+
_.__doc__ = doc
|
|
41
|
+
return _
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _bin_op(
|
|
45
|
+
name: str,
|
|
46
|
+
doc: str = "binary operator",
|
|
47
|
+
) -> Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]], "Column"]:
|
|
48
|
+
"""Create a method for given binary operator."""
|
|
49
|
+
|
|
50
|
+
def _(
|
|
51
|
+
self: "Column",
|
|
52
|
+
other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
|
|
53
|
+
) -> "Column":
|
|
54
|
+
jc = _get_expr(other)
|
|
55
|
+
njc = getattr(self.expr, name)(jc)
|
|
56
|
+
return Column(njc)
|
|
57
|
+
|
|
58
|
+
_.__doc__ = doc
|
|
59
|
+
return _
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _bin_func(
|
|
63
|
+
name: str,
|
|
64
|
+
doc: str = "binary function",
|
|
65
|
+
) -> Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]], "Column"]:
|
|
66
|
+
"""Create a function expression for the given binary function."""
|
|
67
|
+
|
|
68
|
+
def _(
|
|
69
|
+
self: "Column",
|
|
70
|
+
other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
|
|
71
|
+
) -> "Column":
|
|
72
|
+
other = _get_expr(other)
|
|
73
|
+
func = FunctionExpression(name, self.expr, other)
|
|
74
|
+
return Column(func)
|
|
75
|
+
|
|
76
|
+
_.__doc__ = doc
|
|
77
|
+
return _
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class Column:
|
|
81
|
+
"""A column in a DataFrame.
|
|
82
|
+
|
|
83
|
+
:class:`Column` instances can be created by::
|
|
84
|
+
|
|
85
|
+
# 1. Select a column out of a DataFrame
|
|
86
|
+
|
|
87
|
+
df.colName
|
|
88
|
+
df["colName"]
|
|
89
|
+
|
|
90
|
+
# 2. Create from an expression
|
|
91
|
+
df.colName + 1
|
|
92
|
+
1 / df.colName
|
|
93
|
+
|
|
94
|
+
.. versionadded:: 1.3.0
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def __init__(self, expr: Expression) -> None: # noqa: D107
|
|
98
|
+
self.expr = expr
|
|
99
|
+
|
|
100
|
+
# arithmetic operators
|
|
101
|
+
def __neg__(self) -> "Column": # noqa: D105
|
|
102
|
+
return Column(-self.expr)
|
|
103
|
+
|
|
104
|
+
# `and`, `or`, `not` cannot be overloaded in Python,
|
|
105
|
+
# so use bitwise operators as boolean operators
|
|
106
|
+
__and__ = _bin_op("__and__")
|
|
107
|
+
__or__ = _bin_op("__or__")
|
|
108
|
+
__invert__ = _func_op("__invert__")
|
|
109
|
+
__rand__ = _bin_op("__rand__")
|
|
110
|
+
__ror__ = _bin_op("__ror__")
|
|
111
|
+
|
|
112
|
+
__add__ = _bin_op("__add__")
|
|
113
|
+
|
|
114
|
+
__sub__ = _bin_op("__sub__")
|
|
115
|
+
|
|
116
|
+
__mul__ = _bin_op("__mul__")
|
|
117
|
+
|
|
118
|
+
__div__ = _bin_op("__div__")
|
|
119
|
+
|
|
120
|
+
__truediv__ = _bin_op("__truediv__")
|
|
121
|
+
|
|
122
|
+
__mod__ = _bin_op("__mod__")
|
|
123
|
+
|
|
124
|
+
__pow__ = _bin_op("__pow__")
|
|
125
|
+
|
|
126
|
+
__radd__ = _bin_op("__radd__")
|
|
127
|
+
|
|
128
|
+
__rsub__ = _bin_op("__rsub__")
|
|
129
|
+
|
|
130
|
+
__rmul__ = _bin_op("__rmul__")
|
|
131
|
+
|
|
132
|
+
__rdiv__ = _bin_op("__rdiv__")
|
|
133
|
+
|
|
134
|
+
__rtruediv__ = _bin_op("__rtruediv__")
|
|
135
|
+
|
|
136
|
+
__rmod__ = _bin_op("__rmod__")
|
|
137
|
+
|
|
138
|
+
__rpow__ = _bin_op("__rpow__")
|
|
139
|
+
|
|
140
|
+
def __getitem__(self, k: Any) -> "Column": # noqa: ANN401
|
|
141
|
+
"""An expression that gets an item at position ``ordinal`` out of a list,
|
|
142
|
+
or gets an item by key out of a dict.
|
|
143
|
+
|
|
144
|
+
.. versionadded:: 1.3.0
|
|
145
|
+
|
|
146
|
+
.. versionchanged:: 3.4.0
|
|
147
|
+
Supports Spark Connect.
|
|
148
|
+
|
|
149
|
+
Parameters
|
|
150
|
+
----------
|
|
151
|
+
k
|
|
152
|
+
a literal value, or a slice object without step.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
-------
|
|
156
|
+
:class:`Column`
|
|
157
|
+
Column representing the item got by key out of a dict, or substrings sliced by
|
|
158
|
+
the given slice object.
|
|
159
|
+
|
|
160
|
+
Examples:
|
|
161
|
+
--------
|
|
162
|
+
>>> df = spark.createDataFrame([("abcedfg", {"key": "value"})], ["l", "d"])
|
|
163
|
+
>>> df.select(df.l[slice(1, 3)], df.d["key"]).show()
|
|
164
|
+
+------------------+------+
|
|
165
|
+
|substring(l, 1, 3)|d[key]|
|
|
166
|
+
+------------------+------+
|
|
167
|
+
| abc| value|
|
|
168
|
+
+------------------+------+
|
|
169
|
+
""" # noqa: D205
|
|
170
|
+
if isinstance(k, slice):
|
|
171
|
+
raise ContributionsAcceptedError
|
|
172
|
+
# if k.step is not None:
|
|
173
|
+
# raise ValueError("Using a slice with a step value is not supported")
|
|
174
|
+
# return self.substr(k.start, k.stop)
|
|
175
|
+
else:
|
|
176
|
+
# TODO: this is super hacky # noqa: TD002, TD003
|
|
177
|
+
expr_str = str(self.expr) + "." + str(k)
|
|
178
|
+
return Column(ColumnExpression(expr_str))
|
|
179
|
+
|
|
180
|
+
def __getattr__(self, item: Any) -> "Column": # noqa: ANN401
|
|
181
|
+
"""An expression that gets an item at position ``ordinal`` out of a list,
|
|
182
|
+
or gets an item by key out of a dict.
|
|
183
|
+
|
|
184
|
+
Parameters
|
|
185
|
+
----------
|
|
186
|
+
item
|
|
187
|
+
a literal value.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
-------
|
|
191
|
+
:class:`Column`
|
|
192
|
+
Column representing the item got by key out of a dict.
|
|
193
|
+
|
|
194
|
+
Examples:
|
|
195
|
+
--------
|
|
196
|
+
>>> df = spark.createDataFrame([("abcedfg", {"key": "value"})], ["l", "d"])
|
|
197
|
+
>>> df.select(df.d.key).show()
|
|
198
|
+
+------+
|
|
199
|
+
|d[key]|
|
|
200
|
+
+------+
|
|
201
|
+
| value|
|
|
202
|
+
+------+
|
|
203
|
+
""" # noqa: D205
|
|
204
|
+
if item.startswith("__"):
|
|
205
|
+
msg = "Can not access __ (dunder) method"
|
|
206
|
+
raise AttributeError(msg)
|
|
207
|
+
return self[item]
|
|
208
|
+
|
|
209
|
+
def alias(self, alias: str) -> "Column": # noqa: D102
|
|
210
|
+
return Column(self.expr.alias(alias))
|
|
211
|
+
|
|
212
|
+
def when(self, condition: "Column", value: Union["Column", str]) -> "Column": # noqa: D102
|
|
213
|
+
if not isinstance(condition, Column):
|
|
214
|
+
msg = "condition should be a Column"
|
|
215
|
+
raise TypeError(msg)
|
|
216
|
+
v = _get_expr(value)
|
|
217
|
+
expr = self.expr.when(condition.expr, v)
|
|
218
|
+
return Column(expr)
|
|
219
|
+
|
|
220
|
+
def otherwise(self, value: Union["Column", str]) -> "Column": # noqa: D102
|
|
221
|
+
v = _get_expr(value)
|
|
222
|
+
expr = self.expr.otherwise(v)
|
|
223
|
+
return Column(expr)
|
|
224
|
+
|
|
225
|
+
def cast(self, dataType: Union[DataType, str]) -> "Column": # noqa: D102
|
|
226
|
+
internal_type = DuckDBPyType(dataType) if isinstance(dataType, str) else dataType.duckdb_type
|
|
227
|
+
return Column(self.expr.cast(internal_type))
|
|
228
|
+
|
|
229
|
+
def isin(self, *cols: Union[Iterable[Union["Column", str]], Union["Column", str]]) -> "Column": # noqa: D102
|
|
230
|
+
if len(cols) == 1 and isinstance(cols[0], (list, set)):
|
|
231
|
+
# Only one argument supplied, it's a list
|
|
232
|
+
cols = cast("tuple", cols[0])
|
|
233
|
+
|
|
234
|
+
cols = cast(
|
|
235
|
+
"tuple",
|
|
236
|
+
[_get_expr(c) for c in cols],
|
|
237
|
+
)
|
|
238
|
+
return Column(self.expr.isin(*cols))
|
|
239
|
+
|
|
240
|
+
# logistic operators
|
|
241
|
+
def __eq__( # type: ignore[override]
|
|
242
|
+
self,
|
|
243
|
+
other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
|
|
244
|
+
) -> "Column":
|
|
245
|
+
"""Binary function."""
|
|
246
|
+
return Column(self.expr == (_get_expr(other)))
|
|
247
|
+
|
|
248
|
+
def __ne__( # type: ignore[override]
|
|
249
|
+
self,
|
|
250
|
+
other: object,
|
|
251
|
+
) -> "Column":
|
|
252
|
+
"""Binary function."""
|
|
253
|
+
return Column(self.expr != (_get_expr(other)))
|
|
254
|
+
|
|
255
|
+
__lt__ = _bin_op("__lt__")
|
|
256
|
+
|
|
257
|
+
__le__ = _bin_op("__le__")
|
|
258
|
+
|
|
259
|
+
__ge__ = _bin_op("__ge__")
|
|
260
|
+
|
|
261
|
+
__gt__ = _bin_op("__gt__")
|
|
262
|
+
|
|
263
|
+
# String interrogation methods
|
|
264
|
+
|
|
265
|
+
contains = _bin_func("contains")
|
|
266
|
+
rlike = _bin_func("regexp_matches")
|
|
267
|
+
like = _bin_func("~~")
|
|
268
|
+
ilike = _bin_func("~~*")
|
|
269
|
+
startswith = _bin_func("starts_with")
|
|
270
|
+
endswith = _bin_func("suffix")
|
|
271
|
+
|
|
272
|
+
# order
|
|
273
|
+
_asc_doc = """
|
|
274
|
+
Returns a sort expression based on the ascending order of the column.
|
|
275
|
+
Examples
|
|
276
|
+
--------
|
|
277
|
+
>>> from pyspark.sql import Row
|
|
278
|
+
>>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
|
|
279
|
+
>>> df.select(df.name).orderBy(df.name.asc()).collect()
|
|
280
|
+
[Row(name='Alice'), Row(name='Tom')]
|
|
281
|
+
"""
|
|
282
|
+
|
|
283
|
+
_asc_nulls_first_doc = """
|
|
284
|
+
Returns a sort expression based on ascending order of the column, and null values
|
|
285
|
+
return before non-null values.
|
|
286
|
+
|
|
287
|
+
Examples
|
|
288
|
+
--------
|
|
289
|
+
>>> from pyspark.sql import Row
|
|
290
|
+
>>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
|
|
291
|
+
>>> df.select(df.name).orderBy(df.name.asc_nulls_first()).collect()
|
|
292
|
+
[Row(name=None), Row(name='Alice'), Row(name='Tom')]
|
|
293
|
+
|
|
294
|
+
"""
|
|
295
|
+
_asc_nulls_last_doc = """
|
|
296
|
+
Returns a sort expression based on ascending order of the column, and null values
|
|
297
|
+
appear after non-null values.
|
|
298
|
+
|
|
299
|
+
Examples
|
|
300
|
+
--------
|
|
301
|
+
>>> from pyspark.sql import Row
|
|
302
|
+
>>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
|
|
303
|
+
>>> df.select(df.name).orderBy(df.name.asc_nulls_last()).collect()
|
|
304
|
+
[Row(name='Alice'), Row(name='Tom'), Row(name=None)]
|
|
305
|
+
|
|
306
|
+
"""
|
|
307
|
+
_desc_doc = """
|
|
308
|
+
Returns a sort expression based on the descending order of the column.
|
|
309
|
+
Examples
|
|
310
|
+
--------
|
|
311
|
+
>>> from pyspark.sql import Row
|
|
312
|
+
>>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
|
|
313
|
+
>>> df.select(df.name).orderBy(df.name.desc()).collect()
|
|
314
|
+
[Row(name='Tom'), Row(name='Alice')]
|
|
315
|
+
"""
|
|
316
|
+
_desc_nulls_first_doc = """
|
|
317
|
+
Returns a sort expression based on the descending order of the column, and null values
|
|
318
|
+
appear before non-null values.
|
|
319
|
+
|
|
320
|
+
Examples
|
|
321
|
+
--------
|
|
322
|
+
>>> from pyspark.sql import Row
|
|
323
|
+
>>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
|
|
324
|
+
>>> df.select(df.name).orderBy(df.name.desc_nulls_first()).collect()
|
|
325
|
+
[Row(name=None), Row(name='Tom'), Row(name='Alice')]
|
|
326
|
+
|
|
327
|
+
"""
|
|
328
|
+
_desc_nulls_last_doc = """
|
|
329
|
+
Returns a sort expression based on the descending order of the column, and null values
|
|
330
|
+
appear after non-null values.
|
|
331
|
+
|
|
332
|
+
Examples
|
|
333
|
+
--------
|
|
334
|
+
>>> from pyspark.sql import Row
|
|
335
|
+
>>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
|
|
336
|
+
>>> df.select(df.name).orderBy(df.name.desc_nulls_last()).collect()
|
|
337
|
+
[Row(name='Tom'), Row(name='Alice'), Row(name=None)]
|
|
338
|
+
"""
|
|
339
|
+
|
|
340
|
+
asc = _unary_op("asc", _asc_doc)
|
|
341
|
+
desc = _unary_op("desc", _desc_doc)
|
|
342
|
+
nulls_first = _unary_op("nulls_first")
|
|
343
|
+
nulls_last = _unary_op("nulls_last")
|
|
344
|
+
|
|
345
|
+
def asc_nulls_first(self) -> "Column": # noqa: D102
|
|
346
|
+
return self.asc().nulls_first()
|
|
347
|
+
|
|
348
|
+
def asc_nulls_last(self) -> "Column": # noqa: D102
|
|
349
|
+
return self.asc().nulls_last()
|
|
350
|
+
|
|
351
|
+
def desc_nulls_first(self) -> "Column": # noqa: D102
|
|
352
|
+
return self.desc().nulls_first()
|
|
353
|
+
|
|
354
|
+
def desc_nulls_last(self) -> "Column": # noqa: D102
|
|
355
|
+
return self.desc().nulls_last()
|
|
356
|
+
|
|
357
|
+
def isNull(self) -> "Column": # noqa: D102
|
|
358
|
+
return Column(self.expr.isnull())
|
|
359
|
+
|
|
360
|
+
def isNotNull(self) -> "Column": # noqa: D102
|
|
361
|
+
return Column(self.expr.isnotnull())
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import Optional, Union # noqa: D100
|
|
2
|
+
|
|
3
|
+
from duckdb import DuckDBPyConnection
|
|
4
|
+
from duckdb.experimental.spark._globals import _NoValue, _NoValueType
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RuntimeConfig: # noqa: D101
|
|
8
|
+
def __init__(self, connection: DuckDBPyConnection) -> None: # noqa: D107
|
|
9
|
+
self._connection = connection
|
|
10
|
+
|
|
11
|
+
def set(self, key: str, value: str) -> None: # noqa: D102
|
|
12
|
+
raise NotImplementedError
|
|
13
|
+
|
|
14
|
+
def isModifiable(self, key: str) -> bool: # noqa: D102
|
|
15
|
+
raise NotImplementedError
|
|
16
|
+
|
|
17
|
+
def unset(self, key: str) -> None: # noqa: D102
|
|
18
|
+
raise NotImplementedError
|
|
19
|
+
|
|
20
|
+
def get(self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue) -> str: # noqa: D102
|
|
21
|
+
raise NotImplementedError
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
__all__ = ["RuntimeConfig"]
|