duckdb 1.5.0.dev32__cp314-cp314-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckdb might be problematic. Click here for more details.

Files changed (47) hide show
  1. _duckdb.cpython-314-darwin.so +0 -0
  2. duckdb/__init__.py +475 -0
  3. duckdb/__init__.pyi +713 -0
  4. duckdb/bytes_io_wrapper.py +66 -0
  5. duckdb/experimental/__init__.py +2 -0
  6. duckdb/experimental/spark/LICENSE +260 -0
  7. duckdb/experimental/spark/__init__.py +7 -0
  8. duckdb/experimental/spark/_globals.py +77 -0
  9. duckdb/experimental/spark/_typing.py +48 -0
  10. duckdb/experimental/spark/conf.py +45 -0
  11. duckdb/experimental/spark/context.py +164 -0
  12. duckdb/experimental/spark/errors/__init__.py +72 -0
  13. duckdb/experimental/spark/errors/error_classes.py +918 -0
  14. duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
  15. duckdb/experimental/spark/errors/exceptions/base.py +217 -0
  16. duckdb/experimental/spark/errors/utils.py +116 -0
  17. duckdb/experimental/spark/exception.py +15 -0
  18. duckdb/experimental/spark/sql/__init__.py +7 -0
  19. duckdb/experimental/spark/sql/_typing.py +93 -0
  20. duckdb/experimental/spark/sql/catalog.py +78 -0
  21. duckdb/experimental/spark/sql/column.py +368 -0
  22. duckdb/experimental/spark/sql/conf.py +23 -0
  23. duckdb/experimental/spark/sql/dataframe.py +1437 -0
  24. duckdb/experimental/spark/sql/functions.py +6221 -0
  25. duckdb/experimental/spark/sql/group.py +420 -0
  26. duckdb/experimental/spark/sql/readwriter.py +449 -0
  27. duckdb/experimental/spark/sql/session.py +292 -0
  28. duckdb/experimental/spark/sql/streaming.py +37 -0
  29. duckdb/experimental/spark/sql/type_utils.py +105 -0
  30. duckdb/experimental/spark/sql/types.py +1275 -0
  31. duckdb/experimental/spark/sql/udf.py +37 -0
  32. duckdb/filesystem.py +23 -0
  33. duckdb/functional/__init__.py +17 -0
  34. duckdb/functional/__init__.pyi +31 -0
  35. duckdb/polars_io.py +237 -0
  36. duckdb/query_graph/__main__.py +363 -0
  37. duckdb/typing/__init__.py +61 -0
  38. duckdb/typing/__init__.pyi +36 -0
  39. duckdb/udf.py +19 -0
  40. duckdb/value/__init__.py +0 -0
  41. duckdb/value/__init__.pyi +0 -0
  42. duckdb/value/constant/__init__.py +268 -0
  43. duckdb/value/constant/__init__.pyi +115 -0
  44. duckdb-1.5.0.dev32.dist-info/METADATA +326 -0
  45. duckdb-1.5.0.dev32.dist-info/RECORD +47 -0
  46. duckdb-1.5.0.dev32.dist-info/WHEEL +6 -0
  47. duckdb-1.5.0.dev32.dist-info/licenses/LICENSE +7 -0
@@ -0,0 +1,368 @@
1
+ from typing import Union, TYPE_CHECKING, Any, cast, Callable, Tuple
2
+ from ..exception import ContributionsAcceptedError
3
+
4
+ from .types import DataType
5
+
6
+ if TYPE_CHECKING:
7
+ from ._typing import ColumnOrName, LiteralType, DecimalLiteral, DateTimeLiteral
8
+
9
+ from duckdb import ConstantExpression, ColumnExpression, FunctionExpression, Expression
10
+
11
+ from duckdb.typing import DuckDBPyType
12
+
13
+ __all__ = ["Column"]
14
+
15
+
16
+ def _get_expr(x) -> Expression:
17
+ return x.expr if isinstance(x, Column) else ConstantExpression(x)
18
+
19
+
20
+ def _func_op(name: str, doc: str = "") -> Callable[["Column"], "Column"]:
21
+ def _(self: "Column") -> "Column":
22
+ njc = getattr(self.expr, name)()
23
+ return Column(njc)
24
+
25
+ _.__doc__ = doc
26
+ return _
27
+
28
+
29
+ def _unary_op(
30
+ name: str,
31
+ doc: str = "unary operator",
32
+ ) -> Callable[["Column"], "Column"]:
33
+ """Create a method for given unary operator"""
34
+
35
+ def _(self: "Column") -> "Column":
36
+ # Call the function identified by 'name' on the internal Expression object
37
+ expr = getattr(self.expr, name)()
38
+ return Column(expr)
39
+
40
+ _.__doc__ = doc
41
+ return _
42
+
43
+
44
+ def _bin_op(
45
+ name: str,
46
+ doc: str = "binary operator",
47
+ ) -> Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]], "Column"]:
48
+ """Create a method for given binary operator"""
49
+
50
+ def _(
51
+ self: "Column",
52
+ other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
53
+ ) -> "Column":
54
+ jc = _get_expr(other)
55
+ njc = getattr(self.expr, name)(jc)
56
+ return Column(njc)
57
+
58
+ _.__doc__ = doc
59
+ return _
60
+
61
+
62
+ def _bin_func(
63
+ name: str,
64
+ doc: str = "binary function",
65
+ ) -> Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]], "Column"]:
66
+ """Create a function expression for the given binary function"""
67
+
68
+ def _(
69
+ self: "Column",
70
+ other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
71
+ ) -> "Column":
72
+ other = _get_expr(other)
73
+ func = FunctionExpression(name, self.expr, other)
74
+ return Column(func)
75
+
76
+ _.__doc__ = doc
77
+ return _
78
+
79
+
80
+ class Column:
81
+ """
82
+ A column in a DataFrame.
83
+
84
+ :class:`Column` instances can be created by::
85
+
86
+ # 1. Select a column out of a DataFrame
87
+
88
+ df.colName
89
+ df["colName"]
90
+
91
+ # 2. Create from an expression
92
+ df.colName + 1
93
+ 1 / df.colName
94
+
95
+ .. versionadded:: 1.3.0
96
+ """
97
+
98
+ def __init__(self, expr: Expression):
99
+ self.expr = expr
100
+
101
+ # arithmetic operators
102
+ def __neg__(self):
103
+ return Column(-self.expr)
104
+
105
+ # `and`, `or`, `not` cannot be overloaded in Python,
106
+ # so use bitwise operators as boolean operators
107
+ __and__ = _bin_op("__and__")
108
+ __or__ = _bin_op("__or__")
109
+ __invert__ = _func_op("__invert__")
110
+ __rand__ = _bin_op("__rand__")
111
+ __ror__ = _bin_op("__ror__")
112
+
113
+ __add__ = _bin_op("__add__")
114
+
115
+ __sub__ = _bin_op("__sub__")
116
+
117
+ __mul__ = _bin_op("__mul__")
118
+
119
+ __div__ = _bin_op("__div__")
120
+
121
+ __truediv__ = _bin_op("__truediv__")
122
+
123
+ __mod__ = _bin_op("__mod__")
124
+
125
+ __pow__ = _bin_op("__pow__")
126
+
127
+ __radd__ = _bin_op("__radd__")
128
+
129
+ __rsub__ = _bin_op("__rsub__")
130
+
131
+ __rmul__ = _bin_op("__rmul__")
132
+
133
+ __rdiv__ = _bin_op("__rdiv__")
134
+
135
+ __rtruediv__ = _bin_op("__rtruediv__")
136
+
137
+ __rmod__ = _bin_op("__rmod__")
138
+
139
+ __rpow__ = _bin_op("__rpow__")
140
+
141
+ def __getitem__(self, k: Any) -> "Column":
142
+ """
143
+ An expression that gets an item at position ``ordinal`` out of a list,
144
+ or gets an item by key out of a dict.
145
+
146
+ .. versionadded:: 1.3.0
147
+
148
+ .. versionchanged:: 3.4.0
149
+ Supports Spark Connect.
150
+
151
+ Parameters
152
+ ----------
153
+ k
154
+ a literal value, or a slice object without step.
155
+
156
+ Returns
157
+ -------
158
+ :class:`Column`
159
+ Column representing the item got by key out of a dict, or substrings sliced by
160
+ the given slice object.
161
+
162
+ Examples
163
+ --------
164
+ >>> df = spark.createDataFrame([('abcedfg', {"key": "value"})], ["l", "d"])
165
+ >>> df.select(df.l[slice(1, 3)], df.d['key']).show()
166
+ +------------------+------+
167
+ |substring(l, 1, 3)|d[key]|
168
+ +------------------+------+
169
+ | abc| value|
170
+ +------------------+------+
171
+ """
172
+ if isinstance(k, slice):
173
+ raise ContributionsAcceptedError
174
+ # if k.step is not None:
175
+ # raise ValueError("Using a slice with a step value is not supported")
176
+ # return self.substr(k.start, k.stop)
177
+ else:
178
+ # FIXME: this is super hacky
179
+ expr_str = str(self.expr) + "." + str(k)
180
+ return Column(ColumnExpression(expr_str))
181
+
182
+ def __getattr__(self, item: Any) -> "Column":
183
+ """
184
+ An expression that gets an item at position ``ordinal`` out of a list,
185
+ or gets an item by key out of a dict.
186
+
187
+ Parameters
188
+ ----------
189
+ item
190
+ a literal value.
191
+
192
+ Returns
193
+ -------
194
+ :class:`Column`
195
+ Column representing the item got by key out of a dict.
196
+
197
+ Examples
198
+ --------
199
+ >>> df = spark.createDataFrame([('abcedfg', {"key": "value"})], ["l", "d"])
200
+ >>> df.select(df.d.key).show()
201
+ +------+
202
+ |d[key]|
203
+ +------+
204
+ | value|
205
+ +------+
206
+ """
207
+ if item.startswith("__"):
208
+ raise AttributeError("Can not access __ (dunder) method")
209
+ return self[item]
210
+
211
+ def alias(self, alias: str):
212
+ return Column(self.expr.alias(alias))
213
+
214
+ def when(self, condition: "Column", value: Any):
215
+ if not isinstance(condition, Column):
216
+ raise TypeError("condition should be a Column")
217
+ v = _get_expr(value)
218
+ expr = self.expr.when(condition.expr, v)
219
+ return Column(expr)
220
+
221
+ def otherwise(self, value: Any):
222
+ v = _get_expr(value)
223
+ expr = self.expr.otherwise(v)
224
+ return Column(expr)
225
+
226
+ def cast(self, dataType: Union[DataType, str]) -> "Column":
227
+ if isinstance(dataType, str):
228
+ # Try to construct a default DuckDBPyType from it
229
+ internal_type = DuckDBPyType(dataType)
230
+ else:
231
+ internal_type = dataType.duckdb_type
232
+ return Column(self.expr.cast(internal_type))
233
+
234
+ def isin(self, *cols: Any) -> "Column":
235
+ if len(cols) == 1 and isinstance(cols[0], (list, set)):
236
+ # Only one argument supplied, it's a list
237
+ cols = cast(Tuple, cols[0])
238
+
239
+ cols = cast(
240
+ Tuple,
241
+ [_get_expr(c) for c in cols],
242
+ )
243
+ return Column(self.expr.isin(*cols))
244
+
245
+ # logistic operators
246
+ def __eq__( # type: ignore[override]
247
+ self,
248
+ other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
249
+ ) -> "Column":
250
+ """binary function"""
251
+ return Column(self.expr == (_get_expr(other)))
252
+
253
+ def __ne__( # type: ignore[override]
254
+ self,
255
+ other: Any,
256
+ ) -> "Column":
257
+ """binary function"""
258
+ return Column(self.expr != (_get_expr(other)))
259
+
260
+ __lt__ = _bin_op("__lt__")
261
+
262
+ __le__ = _bin_op("__le__")
263
+
264
+ __ge__ = _bin_op("__ge__")
265
+
266
+ __gt__ = _bin_op("__gt__")
267
+
268
+ # String interrogation methods
269
+
270
+ contains = _bin_func("contains")
271
+ rlike = _bin_func("regexp_matches")
272
+ like = _bin_func("~~")
273
+ ilike = _bin_func("~~*")
274
+ startswith = _bin_func("starts_with")
275
+ endswith = _bin_func("suffix")
276
+
277
+ # order
278
+ _asc_doc = """
279
+ Returns a sort expression based on the ascending order of the column.
280
+ Examples
281
+ --------
282
+ >>> from pyspark.sql import Row
283
+ >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
284
+ >>> df.select(df.name).orderBy(df.name.asc()).collect()
285
+ [Row(name='Alice'), Row(name='Tom')]
286
+ """
287
+
288
+ _asc_nulls_first_doc = """
289
+ Returns a sort expression based on ascending order of the column, and null values
290
+ return before non-null values.
291
+
292
+ Examples
293
+ --------
294
+ >>> from pyspark.sql import Row
295
+ >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
296
+ >>> df.select(df.name).orderBy(df.name.asc_nulls_first()).collect()
297
+ [Row(name=None), Row(name='Alice'), Row(name='Tom')]
298
+
299
+ """
300
+ _asc_nulls_last_doc = """
301
+ Returns a sort expression based on ascending order of the column, and null values
302
+ appear after non-null values.
303
+
304
+ Examples
305
+ --------
306
+ >>> from pyspark.sql import Row
307
+ >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
308
+ >>> df.select(df.name).orderBy(df.name.asc_nulls_last()).collect()
309
+ [Row(name='Alice'), Row(name='Tom'), Row(name=None)]
310
+
311
+ """
312
+ _desc_doc = """
313
+ Returns a sort expression based on the descending order of the column.
314
+ Examples
315
+ --------
316
+ >>> from pyspark.sql import Row
317
+ >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
318
+ >>> df.select(df.name).orderBy(df.name.desc()).collect()
319
+ [Row(name='Tom'), Row(name='Alice')]
320
+ """
321
+ _desc_nulls_first_doc = """
322
+ Returns a sort expression based on the descending order of the column, and null values
323
+ appear before non-null values.
324
+
325
+ Examples
326
+ --------
327
+ >>> from pyspark.sql import Row
328
+ >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
329
+ >>> df.select(df.name).orderBy(df.name.desc_nulls_first()).collect()
330
+ [Row(name=None), Row(name='Tom'), Row(name='Alice')]
331
+
332
+ """
333
+ _desc_nulls_last_doc = """
334
+ Returns a sort expression based on the descending order of the column, and null values
335
+ appear after non-null values.
336
+
337
+ Examples
338
+ --------
339
+ >>> from pyspark.sql import Row
340
+ >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
341
+ >>> df.select(df.name).orderBy(df.name.desc_nulls_last()).collect()
342
+ [Row(name='Tom'), Row(name='Alice'), Row(name=None)]
343
+ """
344
+
345
+ asc = _unary_op("asc", _asc_doc)
346
+ desc = _unary_op("desc", _desc_doc)
347
+ nulls_first = _unary_op("nulls_first")
348
+ nulls_last = _unary_op("nulls_last")
349
+
350
+
351
+ def asc_nulls_first(self) -> "Column":
352
+ return self.asc().nulls_first()
353
+
354
+ def asc_nulls_last(self) -> "Column":
355
+ return self.asc().nulls_last()
356
+
357
+ def desc_nulls_first(self) -> "Column":
358
+ return self.desc().nulls_first()
359
+
360
+ def desc_nulls_last(self) -> "Column":
361
+ return self.desc().nulls_last()
362
+
363
+ def isNull(self) -> "Column":
364
+ return Column(self.expr.isnull())
365
+
366
+ def isNotNull(self) -> "Column":
367
+ return Column(self.expr.isnotnull())
368
+
@@ -0,0 +1,23 @@
1
+ from typing import Optional, Union
2
+ from duckdb.experimental.spark._globals import _NoValueType, _NoValue
3
+ from duckdb import DuckDBPyConnection
4
+
5
+
6
+ class RuntimeConfig:
7
+ def __init__(self, connection: DuckDBPyConnection):
8
+ self._connection = connection
9
+
10
+ def set(self, key: str, value: str) -> None:
11
+ raise NotImplementedError
12
+
13
+ def isModifiable(self, key: str) -> bool:
14
+ raise NotImplementedError
15
+
16
+ def unset(self, key: str) -> None:
17
+ raise NotImplementedError
18
+
19
+ def get(self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue) -> str:
20
+ raise NotImplementedError
21
+
22
+
23
+ __all__ = ["RuntimeConfig"]