duckdb 1.5.0.dev56__cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckdb might be problematic. Click here for more details.

Files changed (52) hide show
  1. _duckdb-stubs/__init__.pyi +1443 -0
  2. _duckdb-stubs/_func.pyi +46 -0
  3. _duckdb-stubs/_sqltypes.pyi +75 -0
  4. _duckdb.cpython-314-x86_64-linux-gnu.so +0 -0
  5. adbc_driver_duckdb/__init__.py +50 -0
  6. adbc_driver_duckdb/dbapi.py +115 -0
  7. duckdb/__init__.py +381 -0
  8. duckdb/_dbapi_type_object.py +231 -0
  9. duckdb/_version.py +22 -0
  10. duckdb/bytes_io_wrapper.py +69 -0
  11. duckdb/experimental/__init__.py +3 -0
  12. duckdb/experimental/spark/LICENSE +260 -0
  13. duckdb/experimental/spark/__init__.py +6 -0
  14. duckdb/experimental/spark/_globals.py +77 -0
  15. duckdb/experimental/spark/_typing.py +46 -0
  16. duckdb/experimental/spark/conf.py +46 -0
  17. duckdb/experimental/spark/context.py +180 -0
  18. duckdb/experimental/spark/errors/__init__.py +70 -0
  19. duckdb/experimental/spark/errors/error_classes.py +918 -0
  20. duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
  21. duckdb/experimental/spark/errors/exceptions/base.py +168 -0
  22. duckdb/experimental/spark/errors/utils.py +111 -0
  23. duckdb/experimental/spark/exception.py +18 -0
  24. duckdb/experimental/spark/sql/__init__.py +7 -0
  25. duckdb/experimental/spark/sql/_typing.py +86 -0
  26. duckdb/experimental/spark/sql/catalog.py +79 -0
  27. duckdb/experimental/spark/sql/column.py +361 -0
  28. duckdb/experimental/spark/sql/conf.py +24 -0
  29. duckdb/experimental/spark/sql/dataframe.py +1389 -0
  30. duckdb/experimental/spark/sql/functions.py +6195 -0
  31. duckdb/experimental/spark/sql/group.py +424 -0
  32. duckdb/experimental/spark/sql/readwriter.py +435 -0
  33. duckdb/experimental/spark/sql/session.py +297 -0
  34. duckdb/experimental/spark/sql/streaming.py +36 -0
  35. duckdb/experimental/spark/sql/type_utils.py +107 -0
  36. duckdb/experimental/spark/sql/types.py +1239 -0
  37. duckdb/experimental/spark/sql/udf.py +37 -0
  38. duckdb/filesystem.py +33 -0
  39. duckdb/func/__init__.py +3 -0
  40. duckdb/functional/__init__.py +13 -0
  41. duckdb/polars_io.py +284 -0
  42. duckdb/py.typed +0 -0
  43. duckdb/query_graph/__main__.py +358 -0
  44. duckdb/sqltypes/__init__.py +63 -0
  45. duckdb/typing/__init__.py +71 -0
  46. duckdb/udf.py +24 -0
  47. duckdb/value/__init__.py +1 -0
  48. duckdb/value/constant/__init__.py +270 -0
  49. duckdb-1.5.0.dev56.dist-info/METADATA +87 -0
  50. duckdb-1.5.0.dev56.dist-info/RECORD +52 -0
  51. duckdb-1.5.0.dev56.dist-info/WHEEL +6 -0
  52. duckdb-1.5.0.dev56.dist-info/licenses/LICENSE +7 -0
@@ -0,0 +1,361 @@
1
+ from collections.abc import Iterable # noqa: D100
2
+ from typing import TYPE_CHECKING, Any, Callable, Union, cast
3
+
4
+ from ..exception import ContributionsAcceptedError
5
+ from .types import DataType
6
+
7
+ if TYPE_CHECKING:
8
+ from ._typing import DateTimeLiteral, DecimalLiteral, LiteralType
9
+
10
+ from duckdb import ColumnExpression, ConstantExpression, Expression, FunctionExpression
11
+ from duckdb.sqltypes import DuckDBPyType
12
+
13
+ __all__ = ["Column"]
14
+
15
+
16
+ def _get_expr(x: Union["Column", str]) -> Expression:
17
+ return x.expr if isinstance(x, Column) else ConstantExpression(x)
18
+
19
+
20
+ def _func_op(name: str, doc: str = "") -> Callable[["Column"], "Column"]:
21
+ def _(self: "Column") -> "Column":
22
+ njc = getattr(self.expr, name)()
23
+ return Column(njc)
24
+
25
+ _.__doc__ = doc
26
+ return _
27
+
28
+
29
+ def _unary_op(
30
+ name: str,
31
+ doc: str = "unary operator",
32
+ ) -> Callable[["Column"], "Column"]:
33
+ """Create a method for given unary operator."""
34
+
35
+ def _(self: "Column") -> "Column":
36
+ # Call the function identified by 'name' on the internal Expression object
37
+ expr = getattr(self.expr, name)()
38
+ return Column(expr)
39
+
40
+ _.__doc__ = doc
41
+ return _
42
+
43
+
44
+ def _bin_op(
45
+ name: str,
46
+ doc: str = "binary operator",
47
+ ) -> Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]], "Column"]:
48
+ """Create a method for given binary operator."""
49
+
50
+ def _(
51
+ self: "Column",
52
+ other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
53
+ ) -> "Column":
54
+ jc = _get_expr(other)
55
+ njc = getattr(self.expr, name)(jc)
56
+ return Column(njc)
57
+
58
+ _.__doc__ = doc
59
+ return _
60
+
61
+
62
+ def _bin_func(
63
+ name: str,
64
+ doc: str = "binary function",
65
+ ) -> Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]], "Column"]:
66
+ """Create a function expression for the given binary function."""
67
+
68
+ def _(
69
+ self: "Column",
70
+ other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
71
+ ) -> "Column":
72
+ other = _get_expr(other)
73
+ func = FunctionExpression(name, self.expr, other)
74
+ return Column(func)
75
+
76
+ _.__doc__ = doc
77
+ return _
78
+
79
+
80
+ class Column:
81
+ """A column in a DataFrame.
82
+
83
+ :class:`Column` instances can be created by::
84
+
85
+ # 1. Select a column out of a DataFrame
86
+
87
+ df.colName
88
+ df["colName"]
89
+
90
+ # 2. Create from an expression
91
+ df.colName + 1
92
+ 1 / df.colName
93
+
94
+ .. versionadded:: 1.3.0
95
+ """
96
+
97
+ def __init__(self, expr: Expression) -> None: # noqa: D107
98
+ self.expr = expr
99
+
100
+ # arithmetic operators
101
+ def __neg__(self) -> "Column": # noqa: D105
102
+ return Column(-self.expr)
103
+
104
+ # `and`, `or`, `not` cannot be overloaded in Python,
105
+ # so use bitwise operators as boolean operators
106
+ __and__ = _bin_op("__and__")
107
+ __or__ = _bin_op("__or__")
108
+ __invert__ = _func_op("__invert__")
109
+ __rand__ = _bin_op("__rand__")
110
+ __ror__ = _bin_op("__ror__")
111
+
112
+ __add__ = _bin_op("__add__")
113
+
114
+ __sub__ = _bin_op("__sub__")
115
+
116
+ __mul__ = _bin_op("__mul__")
117
+
118
+ __div__ = _bin_op("__div__")
119
+
120
+ __truediv__ = _bin_op("__truediv__")
121
+
122
+ __mod__ = _bin_op("__mod__")
123
+
124
+ __pow__ = _bin_op("__pow__")
125
+
126
+ __radd__ = _bin_op("__radd__")
127
+
128
+ __rsub__ = _bin_op("__rsub__")
129
+
130
+ __rmul__ = _bin_op("__rmul__")
131
+
132
+ __rdiv__ = _bin_op("__rdiv__")
133
+
134
+ __rtruediv__ = _bin_op("__rtruediv__")
135
+
136
+ __rmod__ = _bin_op("__rmod__")
137
+
138
+ __rpow__ = _bin_op("__rpow__")
139
+
140
+ def __getitem__(self, k: Any) -> "Column": # noqa: ANN401
141
+ """An expression that gets an item at position ``ordinal`` out of a list,
142
+ or gets an item by key out of a dict.
143
+
144
+ .. versionadded:: 1.3.0
145
+
146
+ .. versionchanged:: 3.4.0
147
+ Supports Spark Connect.
148
+
149
+ Parameters
150
+ ----------
151
+ k
152
+ a literal value, or a slice object without step.
153
+
154
+ Returns:
155
+ -------
156
+ :class:`Column`
157
+ Column representing the item got by key out of a dict, or substrings sliced by
158
+ the given slice object.
159
+
160
+ Examples:
161
+ --------
162
+ >>> df = spark.createDataFrame([("abcedfg", {"key": "value"})], ["l", "d"])
163
+ >>> df.select(df.l[slice(1, 3)], df.d["key"]).show()
164
+ +------------------+------+
165
+ |substring(l, 1, 3)|d[key]|
166
+ +------------------+------+
167
+ | abc| value|
168
+ +------------------+------+
169
+ """ # noqa: D205
170
+ if isinstance(k, slice):
171
+ raise ContributionsAcceptedError
172
+ # if k.step is not None:
173
+ # raise ValueError("Using a slice with a step value is not supported")
174
+ # return self.substr(k.start, k.stop)
175
+ else:
176
+ # TODO: this is super hacky # noqa: TD002, TD003
177
+ expr_str = str(self.expr) + "." + str(k)
178
+ return Column(ColumnExpression(expr_str))
179
+
180
+ def __getattr__(self, item: Any) -> "Column": # noqa: ANN401
181
+ """An expression that gets an item at position ``ordinal`` out of a list,
182
+ or gets an item by key out of a dict.
183
+
184
+ Parameters
185
+ ----------
186
+ item
187
+ a literal value.
188
+
189
+ Returns:
190
+ -------
191
+ :class:`Column`
192
+ Column representing the item got by key out of a dict.
193
+
194
+ Examples:
195
+ --------
196
+ >>> df = spark.createDataFrame([("abcedfg", {"key": "value"})], ["l", "d"])
197
+ >>> df.select(df.d.key).show()
198
+ +------+
199
+ |d[key]|
200
+ +------+
201
+ | value|
202
+ +------+
203
+ """ # noqa: D205
204
+ if item.startswith("__"):
205
+ msg = "Can not access __ (dunder) method"
206
+ raise AttributeError(msg)
207
+ return self[item]
208
+
209
+ def alias(self, alias: str) -> "Column": # noqa: D102
210
+ return Column(self.expr.alias(alias))
211
+
212
+ def when(self, condition: "Column", value: Union["Column", str]) -> "Column": # noqa: D102
213
+ if not isinstance(condition, Column):
214
+ msg = "condition should be a Column"
215
+ raise TypeError(msg)
216
+ v = _get_expr(value)
217
+ expr = self.expr.when(condition.expr, v)
218
+ return Column(expr)
219
+
220
+ def otherwise(self, value: Union["Column", str]) -> "Column": # noqa: D102
221
+ v = _get_expr(value)
222
+ expr = self.expr.otherwise(v)
223
+ return Column(expr)
224
+
225
+ def cast(self, dataType: Union[DataType, str]) -> "Column": # noqa: D102
226
+ internal_type = DuckDBPyType(dataType) if isinstance(dataType, str) else dataType.duckdb_type
227
+ return Column(self.expr.cast(internal_type))
228
+
229
+ def isin(self, *cols: Union[Iterable[Union["Column", str]], Union["Column", str]]) -> "Column": # noqa: D102
230
+ if len(cols) == 1 and isinstance(cols[0], (list, set)):
231
+ # Only one argument supplied, it's a list
232
+ cols = cast("tuple", cols[0])
233
+
234
+ cols = cast(
235
+ "tuple",
236
+ [_get_expr(c) for c in cols],
237
+ )
238
+ return Column(self.expr.isin(*cols))
239
+
240
+ # logistic operators
241
+ def __eq__( # type: ignore[override]
242
+ self,
243
+ other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
244
+ ) -> "Column":
245
+ """Binary function."""
246
+ return Column(self.expr == (_get_expr(other)))
247
+
248
+ def __ne__( # type: ignore[override]
249
+ self,
250
+ other: object,
251
+ ) -> "Column":
252
+ """Binary function."""
253
+ return Column(self.expr != (_get_expr(other)))
254
+
255
+ __lt__ = _bin_op("__lt__")
256
+
257
+ __le__ = _bin_op("__le__")
258
+
259
+ __ge__ = _bin_op("__ge__")
260
+
261
+ __gt__ = _bin_op("__gt__")
262
+
263
+ # String interrogation methods
264
+
265
+ contains = _bin_func("contains")
266
+ rlike = _bin_func("regexp_matches")
267
+ like = _bin_func("~~")
268
+ ilike = _bin_func("~~*")
269
+ startswith = _bin_func("starts_with")
270
+ endswith = _bin_func("suffix")
271
+
272
+ # order
273
+ _asc_doc = """
274
+ Returns a sort expression based on the ascending order of the column.
275
+ Examples
276
+ --------
277
+ >>> from pyspark.sql import Row
278
+ >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
279
+ >>> df.select(df.name).orderBy(df.name.asc()).collect()
280
+ [Row(name='Alice'), Row(name='Tom')]
281
+ """
282
+
283
+ _asc_nulls_first_doc = """
284
+ Returns a sort expression based on ascending order of the column, and null values
285
+ return before non-null values.
286
+
287
+ Examples
288
+ --------
289
+ >>> from pyspark.sql import Row
290
+ >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
291
+ >>> df.select(df.name).orderBy(df.name.asc_nulls_first()).collect()
292
+ [Row(name=None), Row(name='Alice'), Row(name='Tom')]
293
+
294
+ """
295
+ _asc_nulls_last_doc = """
296
+ Returns a sort expression based on ascending order of the column, and null values
297
+ appear after non-null values.
298
+
299
+ Examples
300
+ --------
301
+ >>> from pyspark.sql import Row
302
+ >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
303
+ >>> df.select(df.name).orderBy(df.name.asc_nulls_last()).collect()
304
+ [Row(name='Alice'), Row(name='Tom'), Row(name=None)]
305
+
306
+ """
307
+ _desc_doc = """
308
+ Returns a sort expression based on the descending order of the column.
309
+ Examples
310
+ --------
311
+ >>> from pyspark.sql import Row
312
+ >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
313
+ >>> df.select(df.name).orderBy(df.name.desc()).collect()
314
+ [Row(name='Tom'), Row(name='Alice')]
315
+ """
316
+ _desc_nulls_first_doc = """
317
+ Returns a sort expression based on the descending order of the column, and null values
318
+ appear before non-null values.
319
+
320
+ Examples
321
+ --------
322
+ >>> from pyspark.sql import Row
323
+ >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
324
+ >>> df.select(df.name).orderBy(df.name.desc_nulls_first()).collect()
325
+ [Row(name=None), Row(name='Tom'), Row(name='Alice')]
326
+
327
+ """
328
+ _desc_nulls_last_doc = """
329
+ Returns a sort expression based on the descending order of the column, and null values
330
+ appear after non-null values.
331
+
332
+ Examples
333
+ --------
334
+ >>> from pyspark.sql import Row
335
+ >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
336
+ >>> df.select(df.name).orderBy(df.name.desc_nulls_last()).collect()
337
+ [Row(name='Tom'), Row(name='Alice'), Row(name=None)]
338
+ """
339
+
340
+ asc = _unary_op("asc", _asc_doc)
341
+ desc = _unary_op("desc", _desc_doc)
342
+ nulls_first = _unary_op("nulls_first")
343
+ nulls_last = _unary_op("nulls_last")
344
+
345
+ def asc_nulls_first(self) -> "Column": # noqa: D102
346
+ return self.asc().nulls_first()
347
+
348
+ def asc_nulls_last(self) -> "Column": # noqa: D102
349
+ return self.asc().nulls_last()
350
+
351
+ def desc_nulls_first(self) -> "Column": # noqa: D102
352
+ return self.desc().nulls_first()
353
+
354
+ def desc_nulls_last(self) -> "Column": # noqa: D102
355
+ return self.desc().nulls_last()
356
+
357
+ def isNull(self) -> "Column": # noqa: D102
358
+ return Column(self.expr.isnull())
359
+
360
+ def isNotNull(self) -> "Column": # noqa: D102
361
+ return Column(self.expr.isnotnull())
@@ -0,0 +1,24 @@
1
+ from typing import Optional, Union # noqa: D100
2
+
3
+ from duckdb import DuckDBPyConnection
4
+ from duckdb.experimental.spark._globals import _NoValue, _NoValueType
5
+
6
+
7
+ class RuntimeConfig: # noqa: D101
8
+ def __init__(self, connection: DuckDBPyConnection) -> None: # noqa: D107
9
+ self._connection = connection
10
+
11
+ def set(self, key: str, value: str) -> None: # noqa: D102
12
+ raise NotImplementedError
13
+
14
+ def isModifiable(self, key: str) -> bool: # noqa: D102
15
+ raise NotImplementedError
16
+
17
+ def unset(self, key: str) -> None: # noqa: D102
18
+ raise NotImplementedError
19
+
20
+ def get(self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue) -> str: # noqa: D102
21
+ raise NotImplementedError
22
+
23
+
24
+ __all__ = ["RuntimeConfig"]