duckdb 1.4.1.dev125__cp310-cp310-win_amd64.whl → 1.5.0.dev44__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckdb might be problematic. Click here for more details.

Files changed (48) hide show
  1. _duckdb.cp310-win_amd64.pyd +0 -0
  2. duckdb/__init__.py +374 -373
  3. duckdb/__init__.pyi +180 -604
  4. duckdb/bytes_io_wrapper.py +7 -6
  5. duckdb/experimental/__init__.py +1 -2
  6. duckdb/experimental/spark/__init__.py +4 -3
  7. duckdb/experimental/spark/_globals.py +8 -8
  8. duckdb/experimental/spark/_typing.py +9 -7
  9. duckdb/experimental/spark/conf.py +15 -16
  10. duckdb/experimental/spark/context.py +44 -60
  11. duckdb/experimental/spark/errors/__init__.py +35 -33
  12. duckdb/experimental/spark/errors/error_classes.py +1 -1
  13. duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
  14. duckdb/experimental/spark/errors/exceptions/base.py +88 -39
  15. duckdb/experimental/spark/errors/utils.py +16 -11
  16. duckdb/experimental/spark/exception.py +6 -9
  17. duckdb/experimental/spark/sql/__init__.py +5 -5
  18. duckdb/experimental/spark/sql/_typing.py +15 -8
  19. duckdb/experimental/spark/sql/catalog.py +20 -21
  20. duckdb/experimental/spark/sql/column.py +54 -47
  21. duckdb/experimental/spark/sql/conf.py +8 -9
  22. duckdb/experimental/spark/sql/dataframe.py +233 -185
  23. duckdb/experimental/spark/sql/functions.py +1248 -1222
  24. duckdb/experimental/spark/sql/group.py +52 -56
  25. duckdb/experimental/spark/sql/readwriter.py +94 -80
  26. duckdb/experimental/spark/sql/session.py +59 -64
  27. duckdb/experimental/spark/sql/streaming.py +10 -9
  28. duckdb/experimental/spark/sql/type_utils.py +64 -66
  29. duckdb/experimental/spark/sql/types.py +344 -308
  30. duckdb/experimental/spark/sql/udf.py +6 -6
  31. duckdb/filesystem.py +8 -13
  32. duckdb/functional/__init__.py +16 -2
  33. duckdb/polars_io.py +57 -66
  34. duckdb/query_graph/__main__.py +96 -91
  35. duckdb/typing/__init__.py +8 -8
  36. duckdb/typing/__init__.pyi +2 -4
  37. duckdb/udf.py +5 -10
  38. duckdb/value/__init__.py +0 -1
  39. duckdb/value/constant/__init__.py +59 -61
  40. duckdb/value/constant/__init__.pyi +4 -3
  41. duckdb-1.5.0.dev44.dist-info/METADATA +80 -0
  42. duckdb-1.5.0.dev44.dist-info/RECORD +47 -0
  43. adbc_driver_duckdb/__init__.py +0 -50
  44. adbc_driver_duckdb/dbapi.py +0 -115
  45. duckdb-1.4.1.dev125.dist-info/METADATA +0 -326
  46. duckdb-1.4.1.dev125.dist-info/RECORD +0 -49
  47. {duckdb-1.4.1.dev125.dist-info → duckdb-1.5.0.dev44.dist-info}/WHEEL +0 -0
  48. {duckdb-1.4.1.dev125.dist-info → duckdb-1.5.0.dev44.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- # https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/ # noqa: D100
1
+ # https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/
2
2
  from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
3
3
 
4
4
  from .types import DataType
@@ -10,11 +10,11 @@ DataTypeOrString = Union[DataType, str]
10
10
  UserDefinedFunctionLike = TypeVar("UserDefinedFunctionLike")
11
11
 
12
12
 
13
- class UDFRegistration: # noqa: D101
14
- def __init__(self, sparkSession: "SparkSession") -> None: # noqa: D107
13
+ class UDFRegistration:
14
+ def __init__(self, sparkSession: "SparkSession"):
15
15
  self.sparkSession = sparkSession
16
16
 
17
- def register( # noqa: D102
17
+ def register(
18
18
  self,
19
19
  name: str,
20
20
  f: Union[Callable[..., Any], "UserDefinedFunctionLike"],
@@ -22,7 +22,7 @@ class UDFRegistration: # noqa: D101
22
22
  ) -> "UserDefinedFunctionLike":
23
23
  self.sparkSession.conn.create_function(name, f, return_type=returnType)
24
24
 
25
- def registerJavaFunction( # noqa: D102
25
+ def registerJavaFunction(
26
26
  self,
27
27
  name: str,
28
28
  javaClassName: str,
@@ -30,7 +30,7 @@ class UDFRegistration: # noqa: D101
30
30
  ) -> None:
31
31
  raise NotImplementedError
32
32
 
33
- def registerJavaUDAF(self, name: str, javaClassName: str) -> None: # noqa: D102
33
+ def registerJavaUDAF(self, name: str, javaClassName: str) -> None:
34
34
  raise NotImplementedError
35
35
 
36
36
 
duckdb/filesystem.py CHANGED
@@ -1,26 +1,21 @@
1
- from io import TextIOBase # noqa: D100
2
- from typing import IO
3
-
4
- from fsspec import AbstractFileSystem
5
- from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
6
-
1
+ from fsspec import filesystem, AbstractFileSystem
2
+ from fsspec.implementations.memory import MemoryFileSystem, MemoryFile
7
3
  from .bytes_io_wrapper import BytesIOWrapper
4
+ from io import TextIOBase
8
5
 
9
-
10
- def is_file_like(obj) -> bool: # noqa: D103, ANN001
6
+ def is_file_like(obj):
11
7
  # We only care that we can read from the file
12
8
  return hasattr(obj, "read") and hasattr(obj, "seek")
13
9
 
14
10
 
15
- class ModifiedMemoryFileSystem(MemoryFileSystem): # noqa: D101
16
- protocol = ("DUCKDB_INTERNAL_OBJECTSTORE",)
11
+ class ModifiedMemoryFileSystem(MemoryFileSystem):
12
+ protocol = ('DUCKDB_INTERNAL_OBJECTSTORE',)
17
13
  # defer to the original implementation that doesn't hardcode the protocol
18
14
  _strip_protocol = classmethod(AbstractFileSystem._strip_protocol.__func__)
19
15
 
20
- def add_file(self, object: IO, path: str) -> None: # noqa: D102
16
+ def add_file(self, object, path):
21
17
  if not is_file_like(object):
22
- msg = "Can not read from a non file-like object"
23
- raise ValueError(msg)
18
+ raise ValueError("Can not read from a non file-like object")
24
19
  path = self._strip_protocol(path)
25
20
  if isinstance(object, TextIOBase):
26
21
  # Wrap this so that we can return a bytes object from 'read'
@@ -1,3 +1,17 @@
1
- from _duckdb.functional import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType # noqa: D104
1
+ from _duckdb.functional import (
2
+ FunctionNullHandling,
3
+ PythonUDFType,
4
+ SPECIAL,
5
+ DEFAULT,
6
+ NATIVE,
7
+ ARROW
8
+ )
2
9
 
3
- __all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"]
10
+ __all__ = [
11
+ "FunctionNullHandling",
12
+ "PythonUDFType",
13
+ "SPECIAL",
14
+ "DEFAULT",
15
+ "NATIVE",
16
+ "ARROW"
17
+ ]
duckdb/polars_io.py CHANGED
@@ -1,22 +1,20 @@
1
- import datetime # noqa: D100
2
- import json
3
- from collections.abc import Iterator
4
- from decimal import Decimal
5
- from typing import Optional
6
-
1
+ import duckdb
7
2
  import polars as pl
8
- from polars.io.plugins import register_io_source
3
+ from typing import Iterator, Optional
9
4
 
10
- import duckdb
5
+ from polars.io.plugins import register_io_source
11
6
  from duckdb import SQLExpression
12
-
7
+ import json
8
+ from decimal import Decimal
9
+ import datetime
13
10
 
14
11
  def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
15
- """Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
16
-
12
+ """
13
+ Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
14
+
17
15
  Parameters:
18
16
  predicate (pl.Expr): A Polars expression (e.g., col("foo") > 5)
19
-
17
+
20
18
  Returns:
21
19
  SQLExpression: A DuckDB SQL expression string equivalent.
22
20
  None: If conversion fails.
@@ -27,19 +25,20 @@ def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
27
25
  """
28
26
  # Serialize the Polars expression tree to JSON
29
27
  tree = json.loads(predicate.meta.serialize(format="json"))
30
-
28
+
31
29
  try:
32
30
  # Convert the tree to SQL
33
31
  sql_filter = _pl_tree_to_sql(tree)
34
32
  return SQLExpression(sql_filter)
35
- except Exception:
33
+ except:
36
34
  # If the conversion fails, we return None
37
35
  return None
38
36
 
39
37
 
40
38
  def _pl_operation_to_sql(op: str) -> str:
41
- """Map Polars binary operation strings to SQL equivalents.
42
-
39
+ """
40
+ Map Polars binary operation strings to SQL equivalents.
41
+
43
42
  Example:
44
43
  >>> _pl_operation_to_sql("Eq")
45
44
  '='
@@ -56,11 +55,12 @@ def _pl_operation_to_sql(op: str) -> str:
56
55
  "Or": "OR",
57
56
  }[op]
58
57
  except KeyError:
59
- raise NotImplementedError(op) # noqa: B904
58
+ raise NotImplementedError(op)
60
59
 
61
60
 
62
61
  def _escape_sql_identifier(identifier: str) -> str:
63
- """Escape SQL identifiers by doubling any double quotes and wrapping in double quotes.
62
+ """
63
+ Escape SQL identifiers by doubling any double quotes and wrapping in double quotes.
64
64
 
65
65
  Example:
66
66
  >>> _escape_sql_identifier('column"name')
@@ -71,14 +71,15 @@ def _escape_sql_identifier(identifier: str) -> str:
71
71
 
72
72
 
73
73
  def _pl_tree_to_sql(tree: dict) -> str:
74
- """Recursively convert a Polars expression tree (as JSON) to a SQL string.
75
-
74
+ """
75
+ Recursively convert a Polars expression tree (as JSON) to a SQL string.
76
+
76
77
  Parameters:
77
78
  tree (dict): JSON-deserialized expression tree from Polars
78
-
79
+
79
80
  Returns:
80
81
  str: SQL expression string
81
-
82
+
82
83
  Example:
83
84
  Input tree:
84
85
  {
@@ -96,15 +97,13 @@ def _pl_tree_to_sql(tree: dict) -> str:
96
97
  if node_type == "BinaryExpr":
97
98
  # Binary expressions: left OP right
98
99
  return (
99
- "("
100
- + " ".join(
101
- (
102
- _pl_tree_to_sql(subtree["left"]),
103
- _pl_operation_to_sql(subtree["op"]),
104
- _pl_tree_to_sql(subtree["right"]),
105
- )
106
- )
107
- + ")"
100
+ "(" +
101
+ " ".join((
102
+ _pl_tree_to_sql(subtree['left']),
103
+ _pl_operation_to_sql(subtree['op']),
104
+ _pl_tree_to_sql(subtree['right'])
105
+ )) +
106
+ ")"
108
107
  )
109
108
  if node_type == "Column":
110
109
  # A reference to a column name
@@ -132,11 +131,9 @@ def _pl_tree_to_sql(tree: dict) -> str:
132
131
  return f"({arg_sql} IS NULL)"
133
132
  if func == "IsNotNull":
134
133
  return f"({arg_sql} IS NOT NULL)"
135
- msg = f"Boolean function not supported: {func}"
136
- raise NotImplementedError(msg)
134
+ raise NotImplementedError(f"Boolean function not supported: {func}")
137
135
 
138
- msg = f"Unsupported function type: {func_dict}"
139
- raise NotImplementedError(msg)
136
+ raise NotImplementedError(f"Unsupported function type: {func_dict}")
140
137
 
141
138
  if node_type == "Scalar":
142
139
  # Detect format: old style (dtype/value) or new style (direct type key)
@@ -150,30 +147,20 @@ def _pl_tree_to_sql(tree: dict) -> str:
150
147
 
151
148
  # Decimal support
152
149
  if dtype.startswith("{'Decimal'") or dtype == "Decimal":
153
- decimal_value = value["Decimal"]
150
+ decimal_value = value['Decimal']
154
151
  decimal_value = Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1])
155
152
  return str(decimal_value)
156
153
 
157
154
  # Datetime with microseconds since epoch
158
155
  if dtype.startswith("{'Datetime'") or dtype == "Datetime":
159
- micros = value["Datetime"][0]
156
+ micros = value['Datetime'][0]
160
157
  dt_timestamp = datetime.datetime.fromtimestamp(micros / 1_000_000, tz=datetime.UTC)
161
- return f"'{dt_timestamp!s}'::TIMESTAMP"
158
+ return f"'{str(dt_timestamp)}'::TIMESTAMP"
162
159
 
163
160
  # Match simple numeric/boolean types
164
- if dtype in (
165
- "Int8",
166
- "Int16",
167
- "Int32",
168
- "Int64",
169
- "UInt8",
170
- "UInt16",
171
- "UInt32",
172
- "UInt64",
173
- "Float32",
174
- "Float64",
175
- "Boolean",
176
- ):
161
+ if dtype in ("Int8", "Int16", "Int32", "Int64",
162
+ "UInt8", "UInt16", "UInt32", "UInt64",
163
+ "Float32", "Float64", "Boolean"):
177
164
  return str(value[dtype])
178
165
 
179
166
  # Time type
@@ -181,7 +168,9 @@ def _pl_tree_to_sql(tree: dict) -> str:
181
168
  nanoseconds = value["Time"]
182
169
  seconds = nanoseconds // 1_000_000_000
183
170
  microseconds = (nanoseconds % 1_000_000_000) // 1_000
184
- dt_time = (datetime.datetime.min + datetime.timedelta(seconds=seconds, microseconds=microseconds)).time()
171
+ dt_time = (datetime.datetime.min + datetime.timedelta(
172
+ seconds=seconds, microseconds=microseconds
173
+ )).time()
185
174
  return f"'{dt_time}'::TIME"
186
175
 
187
176
  # Date type
@@ -193,7 +182,7 @@ def _pl_tree_to_sql(tree: dict) -> str:
193
182
  # Binary type
194
183
  if dtype == "Binary":
195
184
  binary_data = bytes(value["Binary"])
196
- escaped = "".join(f"\\x{b:02x}" for b in binary_data)
185
+ escaped = ''.join(f'\\x{b:02x}' for b in binary_data)
197
186
  return f"'{escaped}'::BLOB"
198
187
 
199
188
  # String type
@@ -202,16 +191,15 @@ def _pl_tree_to_sql(tree: dict) -> str:
202
191
  string_val = value.get("StringOwned", value.get("String", None))
203
192
  return f"'{string_val}'"
204
193
 
205
- msg = f"Unsupported scalar type {dtype!s}, with value {value}"
206
- raise NotImplementedError(msg)
207
194
 
208
- msg = f"Node type: {node_type} is not implemented. {subtree}"
209
- raise NotImplementedError(msg)
195
+ raise NotImplementedError(f"Unsupported scalar type {str(dtype)}, with value {value}")
210
196
 
197
+ raise NotImplementedError(f"Node type: {node_type} is not implemented. {subtree}")
211
198
 
212
199
  def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -> pl.LazyFrame:
213
- """A polars IO plugin for DuckDB."""
214
-
200
+ """
201
+ A polars IO plugin for DuckDB.
202
+ """
215
203
  def source_generator(
216
204
  with_columns: Optional[list[str]],
217
205
  predicate: Optional[pl.Expr],
@@ -235,12 +223,15 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -
235
223
  results = relation_final.fetch_arrow_reader()
236
224
  else:
237
225
  results = relation_final.fetch_arrow_reader(batch_size)
238
-
239
- for record_batch in iter(results.read_next_batch, None):
240
- if predicate is not None and duck_predicate is None:
241
- # We have a predicate, but did not manage to push it down, we fallback here
242
- yield pl.from_arrow(record_batch).filter(predicate)
243
- else:
244
- yield pl.from_arrow(record_batch)
226
+ while True:
227
+ try:
228
+ record_batch = results.read_next_batch()
229
+ if predicate is not None and duck_predicate is None:
230
+ # We have a predicate, but did not manage to push it down, we fallback here
231
+ yield pl.from_arrow(record_batch).filter(predicate)
232
+ else:
233
+ yield pl.from_arrow(record_batch)
234
+ except StopIteration:
235
+ break
245
236
 
246
237
  return register_io_source(source_generator, schema=schema)