duckdb 1.4.1.dev141__cp310-cp310-macosx_10_9_universal2.whl → 1.5.0.dev44__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckdb might be problematic. Click here for more details.

Files changed (57) hide show
  1. _duckdb.cpython-310-darwin.so +0 -0
  2. duckdb/__init__.py +435 -341
  3. duckdb/__init__.pyi +713 -0
  4. duckdb/bytes_io_wrapper.py +9 -12
  5. duckdb/experimental/__init__.py +1 -2
  6. duckdb/experimental/spark/__init__.py +4 -3
  7. duckdb/experimental/spark/_globals.py +8 -8
  8. duckdb/experimental/spark/_typing.py +9 -7
  9. duckdb/experimental/spark/conf.py +15 -16
  10. duckdb/experimental/spark/context.py +44 -60
  11. duckdb/experimental/spark/errors/__init__.py +35 -33
  12. duckdb/experimental/spark/errors/error_classes.py +1 -1
  13. duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
  14. duckdb/experimental/spark/errors/exceptions/base.py +88 -39
  15. duckdb/experimental/spark/errors/utils.py +16 -11
  16. duckdb/experimental/spark/exception.py +6 -9
  17. duckdb/experimental/spark/sql/__init__.py +5 -5
  18. duckdb/experimental/spark/sql/_typing.py +15 -8
  19. duckdb/experimental/spark/sql/catalog.py +20 -21
  20. duckdb/experimental/spark/sql/column.py +55 -48
  21. duckdb/experimental/spark/sql/conf.py +8 -9
  22. duckdb/experimental/spark/sql/dataframe.py +233 -185
  23. duckdb/experimental/spark/sql/functions.py +1248 -1222
  24. duckdb/experimental/spark/sql/group.py +52 -56
  25. duckdb/experimental/spark/sql/readwriter.py +94 -80
  26. duckdb/experimental/spark/sql/session.py +59 -64
  27. duckdb/experimental/spark/sql/streaming.py +10 -9
  28. duckdb/experimental/spark/sql/type_utils.py +65 -67
  29. duckdb/experimental/spark/sql/types.py +345 -309
  30. duckdb/experimental/spark/sql/udf.py +6 -6
  31. duckdb/filesystem.py +16 -26
  32. duckdb/functional/__init__.py +16 -12
  33. duckdb/functional/__init__.pyi +31 -0
  34. duckdb/polars_io.py +83 -130
  35. duckdb/query_graph/__main__.py +96 -91
  36. duckdb/typing/__init__.py +8 -18
  37. duckdb/typing/__init__.pyi +36 -0
  38. duckdb/udf.py +5 -10
  39. duckdb/value/__init__.py +0 -1
  40. duckdb/value/constant/__init__.py +60 -62
  41. duckdb/value/constant/__init__.pyi +115 -0
  42. duckdb-1.5.0.dev44.dist-info/METADATA +80 -0
  43. duckdb-1.5.0.dev44.dist-info/RECORD +47 -0
  44. _duckdb-stubs/__init__.pyi +0 -1443
  45. _duckdb-stubs/_func.pyi +0 -46
  46. _duckdb-stubs/_sqltypes.pyi +0 -75
  47. adbc_driver_duckdb/__init__.py +0 -50
  48. adbc_driver_duckdb/dbapi.py +0 -115
  49. duckdb/_dbapi_type_object.py +0 -231
  50. duckdb/_version.py +0 -22
  51. duckdb/func/__init__.py +0 -3
  52. duckdb/sqltypes/__init__.py +0 -63
  53. duckdb-1.4.1.dev141.dist-info/METADATA +0 -326
  54. duckdb-1.4.1.dev141.dist-info/RECORD +0 -52
  55. /duckdb/{py.typed → value/__init__.pyi} +0 -0
  56. {duckdb-1.4.1.dev141.dist-info → duckdb-1.5.0.dev44.dist-info}/WHEEL +0 -0
  57. {duckdb-1.4.1.dev141.dist-info → duckdb-1.5.0.dev44.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- # https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/ # noqa: D100
1
+ # https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/
2
2
  from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
3
3
 
4
4
  from .types import DataType
@@ -10,11 +10,11 @@ DataTypeOrString = Union[DataType, str]
10
10
  UserDefinedFunctionLike = TypeVar("UserDefinedFunctionLike")
11
11
 
12
12
 
13
- class UDFRegistration: # noqa: D101
14
- def __init__(self, sparkSession: "SparkSession") -> None: # noqa: D107
13
+ class UDFRegistration:
14
+ def __init__(self, sparkSession: "SparkSession"):
15
15
  self.sparkSession = sparkSession
16
16
 
17
- def register( # noqa: D102
17
+ def register(
18
18
  self,
19
19
  name: str,
20
20
  f: Union[Callable[..., Any], "UserDefinedFunctionLike"],
@@ -22,7 +22,7 @@ class UDFRegistration: # noqa: D101
22
22
  ) -> "UserDefinedFunctionLike":
23
23
  self.sparkSession.conn.create_function(name, f, return_type=returnType)
24
24
 
25
- def registerJavaFunction( # noqa: D102
25
+ def registerJavaFunction(
26
26
  self,
27
27
  name: str,
28
28
  javaClassName: str,
@@ -30,7 +30,7 @@ class UDFRegistration: # noqa: D101
30
30
  ) -> None:
31
31
  raise NotImplementedError
32
32
 
33
- def registerJavaUDAF(self, name: str, javaClassName: str) -> None: # noqa: D102
33
+ def registerJavaUDAF(self, name: str, javaClassName: str) -> None:
34
34
  raise NotImplementedError
35
35
 
36
36
 
duckdb/filesystem.py CHANGED
@@ -1,33 +1,23 @@
1
- """In-memory filesystem to store ephemeral dependencies.
2
-
3
- Warning: Not for external use. May change at any moment. Likely to be made internal.
4
- """
5
-
6
- from __future__ import annotations
7
-
8
- import io
9
- import typing
10
-
11
- from fsspec import AbstractFileSystem
12
- from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
13
-
1
+ from fsspec import filesystem, AbstractFileSystem
2
+ from fsspec.implementations.memory import MemoryFileSystem, MemoryFile
14
3
  from .bytes_io_wrapper import BytesIOWrapper
4
+ from io import TextIOBase
15
5
 
6
+ def is_file_like(obj):
7
+ # We only care that we can read from the file
8
+ return hasattr(obj, "read") and hasattr(obj, "seek")
16
9
 
17
- class ModifiedMemoryFileSystem(MemoryFileSystem):
18
- """In-memory filesystem implementation that uses its own protocol."""
19
10
 
20
- protocol = ("DUCKDB_INTERNAL_OBJECTSTORE",)
11
+ class ModifiedMemoryFileSystem(MemoryFileSystem):
12
+ protocol = ('DUCKDB_INTERNAL_OBJECTSTORE',)
21
13
  # defer to the original implementation that doesn't hardcode the protocol
22
- _strip_protocol: typing.Callable[[str], str] = classmethod(AbstractFileSystem._strip_protocol.__func__) # type: ignore[assignment]
14
+ _strip_protocol = classmethod(AbstractFileSystem._strip_protocol.__func__)
23
15
 
24
- def add_file(self, obj: io.IOBase | BytesIOWrapper | object, path: str) -> None:
25
- """Add a file to the filesystem."""
26
- if not (hasattr(obj, "read") and hasattr(obj, "seek")):
27
- msg = "Can not read from a non file-like object"
28
- raise TypeError(msg)
29
- if isinstance(obj, io.TextIOBase):
30
- # Wrap this so that we can return a bytes object from 'read'
31
- obj = BytesIOWrapper(obj)
16
+ def add_file(self, object, path):
17
+ if not is_file_like(object):
18
+ raise ValueError("Can not read from a non file-like object")
32
19
  path = self._strip_protocol(path)
33
- self.store[path] = MemoryFile(self, path, obj.read())
20
+ if isinstance(object, TextIOBase):
21
+ # Wrap this so that we can return a bytes object from 'read'
22
+ object = BytesIOWrapper(object)
23
+ self.store[path] = MemoryFile(self, path, object.read())
@@ -1,13 +1,17 @@
1
- """DuckDB function constants and types. DEPRECATED: please use `duckdb.func` instead."""
2
-
3
- import warnings
4
-
5
- from duckdb.func import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType
6
-
7
- __all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"]
8
-
9
- warnings.warn(
10
- "`duckdb.functional` is deprecated and will be removed in a future version. Please use `duckdb.func` instead.",
11
- DeprecationWarning,
12
- stacklevel=2,
1
+ from _duckdb.functional import (
2
+ FunctionNullHandling,
3
+ PythonUDFType,
4
+ SPECIAL,
5
+ DEFAULT,
6
+ NATIVE,
7
+ ARROW
13
8
  )
9
+
10
+ __all__ = [
11
+ "FunctionNullHandling",
12
+ "PythonUDFType",
13
+ "SPECIAL",
14
+ "DEFAULT",
15
+ "NATIVE",
16
+ "ARROW"
17
+ ]
@@ -0,0 +1,31 @@
1
+ from typing import Dict
2
+
3
+ SPECIAL: FunctionNullHandling
4
+ DEFAULT: FunctionNullHandling
5
+
6
+ NATIVE: PythonUDFType
7
+ ARROW: PythonUDFType
8
+
9
+ class FunctionNullHandling:
10
+ DEFAULT: FunctionNullHandling
11
+ SPECIAL: FunctionNullHandling
12
+ def __int__(self) -> int: ...
13
+ def __index__(self) -> int: ...
14
+ @property
15
+ def __members__(self) -> Dict[str, FunctionNullHandling]: ...
16
+ @property
17
+ def name(self) -> str: ...
18
+ @property
19
+ def value(self) -> int: ...
20
+
21
+ class PythonUDFType:
22
+ NATIVE: PythonUDFType
23
+ ARROW: PythonUDFType
24
+ def __int__(self) -> int: ...
25
+ def __index__(self) -> int: ...
26
+ @property
27
+ def __members__(self) -> Dict[str, PythonUDFType]: ...
28
+ @property
29
+ def name(self) -> str: ...
30
+ @property
31
+ def value(self) -> int: ...
duckdb/polars_io.py CHANGED
@@ -1,30 +1,20 @@
1
- from __future__ import annotations # noqa: D100
1
+ import duckdb
2
+ import polars as pl
3
+ from typing import Iterator, Optional
2
4
 
3
- import contextlib
4
- import datetime
5
+ from polars.io.plugins import register_io_source
6
+ from duckdb import SQLExpression
5
7
  import json
6
- import typing
7
8
  from decimal import Decimal
9
+ import datetime
8
10
 
9
- import polars as pl
10
- from polars.io.plugins import register_io_source
11
-
12
- import duckdb
13
-
14
- if typing.TYPE_CHECKING:
15
- from collections.abc import Iterator
16
-
17
- import typing_extensions
18
-
19
- _ExpressionTree: typing_extensions.TypeAlias = typing.Dict[str, typing.Union[str, int, "_ExpressionTree", typing.Any]] # noqa: UP006
20
-
21
-
22
- def _predicate_to_expression(predicate: pl.Expr) -> duckdb.Expression | None:
23
- """Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
24
-
11
+ def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
12
+ """
13
+ Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
14
+
25
15
  Parameters:
26
16
  predicate (pl.Expr): A Polars expression (e.g., col("foo") > 5)
27
-
17
+
28
18
  Returns:
29
19
  SQLExpression: A DuckDB SQL expression string equivalent.
30
20
  None: If conversion fails.
@@ -35,19 +25,20 @@ def _predicate_to_expression(predicate: pl.Expr) -> duckdb.Expression | None:
35
25
  """
36
26
  # Serialize the Polars expression tree to JSON
37
27
  tree = json.loads(predicate.meta.serialize(format="json"))
38
-
28
+
39
29
  try:
40
30
  # Convert the tree to SQL
41
31
  sql_filter = _pl_tree_to_sql(tree)
42
- return duckdb.SQLExpression(sql_filter)
43
- except Exception:
32
+ return SQLExpression(sql_filter)
33
+ except:
44
34
  # If the conversion fails, we return None
45
35
  return None
46
36
 
47
37
 
48
38
  def _pl_operation_to_sql(op: str) -> str:
49
- """Map Polars binary operation strings to SQL equivalents.
50
-
39
+ """
40
+ Map Polars binary operation strings to SQL equivalents.
41
+
51
42
  Example:
52
43
  >>> _pl_operation_to_sql("Eq")
53
44
  '='
@@ -64,11 +55,12 @@ def _pl_operation_to_sql(op: str) -> str:
64
55
  "Or": "OR",
65
56
  }[op]
66
57
  except KeyError:
67
- raise NotImplementedError(op) # noqa: B904
58
+ raise NotImplementedError(op)
68
59
 
69
60
 
70
61
  def _escape_sql_identifier(identifier: str) -> str:
71
- """Escape SQL identifiers by doubling any double quotes and wrapping in double quotes.
62
+ """
63
+ Escape SQL identifiers by doubling any double quotes and wrapping in double quotes.
72
64
 
73
65
  Example:
74
66
  >>> _escape_sql_identifier('column"name')
@@ -78,15 +70,16 @@ def _escape_sql_identifier(identifier: str) -> str:
78
70
  return f'"{escaped}"'
79
71
 
80
72
 
81
- def _pl_tree_to_sql(tree: _ExpressionTree) -> str:
82
- """Recursively convert a Polars expression tree (as JSON) to a SQL string.
83
-
73
+ def _pl_tree_to_sql(tree: dict) -> str:
74
+ """
75
+ Recursively convert a Polars expression tree (as JSON) to a SQL string.
76
+
84
77
  Parameters:
85
78
  tree (dict): JSON-deserialized expression tree from Polars
86
-
79
+
87
80
  Returns:
88
81
  str: SQL expression string
89
-
82
+
90
83
  Example:
91
84
  Input tree:
92
85
  {
@@ -99,51 +92,36 @@ def _pl_tree_to_sql(tree: _ExpressionTree) -> str:
99
92
  Output: "(foo > 5)"
100
93
  """
101
94
  [node_type] = tree.keys()
95
+ subtree = tree[node_type]
102
96
 
103
97
  if node_type == "BinaryExpr":
104
98
  # Binary expressions: left OP right
105
- bin_expr_tree = tree[node_type]
106
- assert isinstance(bin_expr_tree, dict), f"A {node_type} should be a dict but got {type(bin_expr_tree)}"
107
- lhs, op, rhs = bin_expr_tree["left"], bin_expr_tree["op"], bin_expr_tree["right"]
108
- assert isinstance(lhs, dict), f"LHS of a {node_type} should be a dict but got {type(lhs)}"
109
- assert isinstance(op, str), f"The op of a {node_type} should be a str but got {type(op)}"
110
- assert isinstance(rhs, dict), f"RHS of a {node_type} should be a dict but got {type(rhs)}"
111
- return f"({_pl_tree_to_sql(lhs)} {_pl_operation_to_sql(op)} {_pl_tree_to_sql(rhs)})"
99
+ return (
100
+ "(" +
101
+ " ".join((
102
+ _pl_tree_to_sql(subtree['left']),
103
+ _pl_operation_to_sql(subtree['op']),
104
+ _pl_tree_to_sql(subtree['right'])
105
+ )) +
106
+ ")"
107
+ )
112
108
  if node_type == "Column":
113
109
  # A reference to a column name
114
110
  # Wrap in quotes to handle special characters
115
- col_name = tree[node_type]
116
- assert isinstance(col_name, str), f"The col name of a {node_type} should be a str but got {type(col_name)}"
117
- return _escape_sql_identifier(col_name)
111
+ return _escape_sql_identifier(subtree)
118
112
 
119
113
  if node_type in ("Literal", "Dyn"):
120
114
  # Recursively process dynamic or literal values
121
- val_tree = tree[node_type]
122
- assert isinstance(val_tree, dict), f"A {node_type} should be a dict but got {type(val_tree)}"
123
- return _pl_tree_to_sql(val_tree)
115
+ return _pl_tree_to_sql(subtree)
124
116
 
125
117
  if node_type == "Int":
126
118
  # Direct integer literals
127
- int_literal = tree[node_type]
128
- assert isinstance(int_literal, (int, str)), (
129
- f"The value of an Int should be an int or str but got {type(int_literal)}"
130
- )
131
- return str(int_literal)
119
+ return str(subtree)
132
120
 
133
121
  if node_type == "Function":
134
122
  # Handle boolean functions like IsNull, IsNotNull
135
- func_tree = tree[node_type]
136
- assert isinstance(func_tree, dict), f"A {node_type} should be a dict but got {type(func_tree)}"
137
- inputs = func_tree["input"]
138
- assert isinstance(inputs, list), f"A {node_type} should have a list of dicts as input but got {type(inputs)}"
139
- input_tree = inputs[0]
140
- assert isinstance(input_tree, dict), (
141
- f"A {node_type} should have a list of dicts as input but got {type(input_tree)}"
142
- )
143
- func_dict = func_tree["function"]
144
- assert isinstance(func_dict, dict), (
145
- f"A {node_type} should have a function dict as input but got {type(func_dict)}"
146
- )
123
+ inputs = subtree["input"]
124
+ func_dict = subtree["function"]
147
125
 
148
126
  if "Boolean" in func_dict:
149
127
  func = func_dict["Boolean"]
@@ -153,107 +131,80 @@ def _pl_tree_to_sql(tree: _ExpressionTree) -> str:
153
131
  return f"({arg_sql} IS NULL)"
154
132
  if func == "IsNotNull":
155
133
  return f"({arg_sql} IS NOT NULL)"
156
- msg = f"Boolean function not supported: {func}"
157
- raise NotImplementedError(msg)
134
+ raise NotImplementedError(f"Boolean function not supported: {func}")
158
135
 
159
- msg = f"Unsupported function type: {func_dict}"
160
- raise NotImplementedError(msg)
136
+ raise NotImplementedError(f"Unsupported function type: {func_dict}")
161
137
 
162
138
  if node_type == "Scalar":
163
139
  # Detect format: old style (dtype/value) or new style (direct type key)
164
- scalar_tree = tree[node_type]
165
- assert isinstance(scalar_tree, dict), f"A {node_type} should be a dict but got {type(scalar_tree)}"
166
- if "dtype" in scalar_tree and "value" in scalar_tree:
167
- dtype = str(scalar_tree["dtype"])
168
- value = scalar_tree["value"]
140
+ if "dtype" in subtree and "value" in subtree:
141
+ dtype = str(subtree["dtype"])
142
+ value = subtree["value"]
169
143
  else:
170
144
  # New style: dtype is the single key in the dict
171
- dtype = next(iter(scalar_tree.keys()))
172
- value = scalar_tree
173
- assert isinstance(dtype, str), f"A {node_type} should have a str dtype but got {type(dtype)}"
174
- assert isinstance(value, dict), f"A {node_type} should have a dict value but got {type(value)}"
145
+ dtype = next(iter(subtree.keys()))
146
+ value = subtree
175
147
 
176
148
  # Decimal support
177
149
  if dtype.startswith("{'Decimal'") or dtype == "Decimal":
178
- decimal_value = value["Decimal"]
179
- assert isinstance(decimal_value, list), (
180
- f"A {dtype} should be a two or three member list but got {type(decimal_value)}"
181
- )
182
- assert 2 <= len(decimal_value) <= 3, (
183
- f"A {dtype} should be a two or three member list but got {len(decimal_value)} member list"
184
- )
185
- return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[-1]))
150
+ decimal_value = value['Decimal']
151
+ decimal_value = Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1])
152
+ return str(decimal_value)
186
153
 
187
154
  # Datetime with microseconds since epoch
188
155
  if dtype.startswith("{'Datetime'") or dtype == "Datetime":
189
- micros = value["Datetime"]
190
- assert isinstance(micros, list), f"A {dtype} should be a one member list but got {type(micros)}"
191
- dt_timestamp = datetime.datetime.fromtimestamp(micros[0] / 1_000_000, tz=datetime.timezone.utc)
192
- return f"'{dt_timestamp!s}'::TIMESTAMP"
156
+ micros = value['Datetime'][0]
157
+ dt_timestamp = datetime.datetime.fromtimestamp(micros / 1_000_000, tz=datetime.UTC)
158
+ return f"'{str(dt_timestamp)}'::TIMESTAMP"
193
159
 
194
160
  # Match simple numeric/boolean types
195
- if dtype in (
196
- "Int8",
197
- "Int16",
198
- "Int32",
199
- "Int64",
200
- "UInt8",
201
- "UInt16",
202
- "UInt32",
203
- "UInt64",
204
- "Float32",
205
- "Float64",
206
- "Boolean",
207
- ):
161
+ if dtype in ("Int8", "Int16", "Int32", "Int64",
162
+ "UInt8", "UInt16", "UInt32", "UInt64",
163
+ "Float32", "Float64", "Boolean"):
208
164
  return str(value[dtype])
209
165
 
210
166
  # Time type
211
167
  if dtype == "Time":
212
168
  nanoseconds = value["Time"]
213
- assert isinstance(nanoseconds, int), f"A {dtype} should be an int but got {type(nanoseconds)}"
214
169
  seconds = nanoseconds // 1_000_000_000
215
170
  microseconds = (nanoseconds % 1_000_000_000) // 1_000
216
- dt_time = (datetime.datetime.min + datetime.timedelta(seconds=seconds, microseconds=microseconds)).time()
171
+ dt_time = (datetime.datetime.min + datetime.timedelta(
172
+ seconds=seconds, microseconds=microseconds
173
+ )).time()
217
174
  return f"'{dt_time}'::TIME"
218
175
 
219
176
  # Date type
220
177
  if dtype == "Date":
221
178
  days_since_epoch = value["Date"]
222
- assert isinstance(days_since_epoch, (float, int)), (
223
- f"A {dtype} should be a number but got {type(days_since_epoch)}"
224
- )
225
179
  date = datetime.date(1970, 1, 1) + datetime.timedelta(days=days_since_epoch)
226
180
  return f"'{date}'::DATE"
227
181
 
228
182
  # Binary type
229
183
  if dtype == "Binary":
230
- bin_value = value["Binary"]
231
- assert isinstance(bin_value, list), f"A {dtype} should be a list but got {type(bin_value)}"
232
- binary_data = bytes(bin_value)
233
- escaped = "".join(f"\\x{b:02x}" for b in binary_data)
184
+ binary_data = bytes(value["Binary"])
185
+ escaped = ''.join(f'\\x{b:02x}' for b in binary_data)
234
186
  return f"'{escaped}'::BLOB"
235
187
 
236
188
  # String type
237
189
  if dtype == "String" or dtype == "StringOwned":
238
190
  # Some new formats may store directly under StringOwned
239
- string_val: object | None = value.get("StringOwned", value.get("String", None))
191
+ string_val = value.get("StringOwned", value.get("String", None))
240
192
  return f"'{string_val}'"
241
193
 
242
- msg = f"Unsupported scalar type {dtype!s}, with value {value}"
243
- raise NotImplementedError(msg)
244
194
 
245
- msg = f"Node type: {node_type} is not implemented. {tree[node_type]}"
246
- raise NotImplementedError(msg)
195
+ raise NotImplementedError(f"Unsupported scalar type {str(dtype)}, with value {value}")
247
196
 
197
+ raise NotImplementedError(f"Node type: {node_type} is not implemented. {subtree}")
248
198
 
249
199
  def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -> pl.LazyFrame:
250
- """A polars IO plugin for DuckDB."""
251
-
200
+ """
201
+ A polars IO plugin for DuckDB.
202
+ """
252
203
  def source_generator(
253
- with_columns: list[str] | None,
254
- predicate: pl.Expr | None,
255
- n_rows: int | None,
256
- batch_size: int | None,
204
+ with_columns: Optional[list[str]],
205
+ predicate: Optional[pl.Expr],
206
+ n_rows: Optional[int],
207
+ batch_size: Optional[int],
257
208
  ) -> Iterator[pl.DataFrame]:
258
209
  duck_predicate = None
259
210
  relation_final = relation
@@ -264,8 +215,7 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -
264
215
  relation_final = relation_final.limit(n_rows)
265
216
  if predicate is not None:
266
217
  # We have a predicate, if possible, we push it down to DuckDB
267
- with contextlib.suppress(AssertionError, KeyError):
268
- duck_predicate = _predicate_to_expression(predicate)
218
+ duck_predicate = _predicate_to_expression(predicate)
269
219
  # Try to pushdown filter, if one exists
270
220
  if duck_predicate is not None:
271
221
  relation_final = relation_final.filter(duck_predicate)
@@ -273,12 +223,15 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -
273
223
  results = relation_final.fetch_arrow_reader()
274
224
  else:
275
225
  results = relation_final.fetch_arrow_reader(batch_size)
276
-
277
- for record_batch in iter(results.read_next_batch, None):
278
- if predicate is not None and duck_predicate is None:
279
- # We have a predicate, but did not manage to push it down, we fallback here
280
- yield pl.from_arrow(record_batch).filter(predicate) # type: ignore[arg-type,misc]
281
- else:
282
- yield pl.from_arrow(record_batch) # type: ignore[misc]
226
+ while True:
227
+ try:
228
+ record_batch = results.read_next_batch()
229
+ if predicate is not None and duck_predicate is None:
230
+ # We have a predicate, but did not manage to push it down, we fallback here
231
+ yield pl.from_arrow(record_batch).filter(predicate)
232
+ else:
233
+ yield pl.from_arrow(record_batch)
234
+ except StopIteration:
235
+ break
283
236
 
284
237
  return register_io_source(source_generator, schema=schema)