duckdb 1.4.0.dev2849__cp312-cp312-win_amd64.whl → 1.4.2.dev26__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckdb might be problematic. Click here for more details.

Files changed (58) hide show
  1. _duckdb-stubs/__init__.pyi +1443 -0
  2. _duckdb-stubs/_func.pyi +46 -0
  3. _duckdb-stubs/_sqltypes.pyi +75 -0
  4. duckdb/duckdb.cp312-win_amd64.pyd → _duckdb.cp312-win_amd64.pyd +0 -0
  5. adbc_driver_duckdb/__init__.py +11 -8
  6. adbc_driver_duckdb/dbapi.py +2 -3
  7. duckdb/__init__.py +343 -388
  8. duckdb/_dbapi_type_object.py +231 -0
  9. duckdb/_version.py +22 -0
  10. duckdb/bytes_io_wrapper.py +12 -9
  11. duckdb/experimental/__init__.py +2 -1
  12. duckdb/experimental/spark/__init__.py +3 -4
  13. duckdb/experimental/spark/_globals.py +8 -8
  14. duckdb/experimental/spark/_typing.py +7 -9
  15. duckdb/experimental/spark/conf.py +16 -15
  16. duckdb/experimental/spark/context.py +60 -44
  17. duckdb/experimental/spark/errors/__init__.py +33 -35
  18. duckdb/experimental/spark/errors/error_classes.py +1 -1
  19. duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
  20. duckdb/experimental/spark/errors/exceptions/base.py +39 -88
  21. duckdb/experimental/spark/errors/utils.py +11 -16
  22. duckdb/experimental/spark/exception.py +9 -6
  23. duckdb/experimental/spark/sql/__init__.py +5 -5
  24. duckdb/experimental/spark/sql/_typing.py +8 -15
  25. duckdb/experimental/spark/sql/catalog.py +21 -20
  26. duckdb/experimental/spark/sql/column.py +48 -55
  27. duckdb/experimental/spark/sql/conf.py +9 -8
  28. duckdb/experimental/spark/sql/dataframe.py +213 -231
  29. duckdb/experimental/spark/sql/functions.py +1317 -1220
  30. duckdb/experimental/spark/sql/group.py +56 -52
  31. duckdb/experimental/spark/sql/readwriter.py +80 -94
  32. duckdb/experimental/spark/sql/session.py +64 -59
  33. duckdb/experimental/spark/sql/streaming.py +9 -10
  34. duckdb/experimental/spark/sql/type_utils.py +67 -65
  35. duckdb/experimental/spark/sql/types.py +309 -345
  36. duckdb/experimental/spark/sql/udf.py +6 -6
  37. duckdb/filesystem.py +26 -16
  38. duckdb/func/__init__.py +3 -0
  39. duckdb/functional/__init__.py +12 -16
  40. duckdb/polars_io.py +141 -82
  41. duckdb/query_graph/__main__.py +91 -96
  42. duckdb/sqltypes/__init__.py +63 -0
  43. duckdb/typing/__init__.py +18 -8
  44. duckdb/udf.py +10 -5
  45. duckdb/value/__init__.py +1 -0
  46. duckdb/value/{constant.py → constant/__init__.py} +62 -60
  47. duckdb-1.4.2.dev26.dist-info/METADATA +88 -0
  48. duckdb-1.4.2.dev26.dist-info/RECORD +52 -0
  49. {duckdb-1.4.0.dev2849.dist-info → duckdb-1.4.2.dev26.dist-info}/WHEEL +1 -1
  50. duckdb-1.4.2.dev26.dist-info/licenses/LICENSE +7 -0
  51. duckdb-1.4.0.dev2849.dist-info/METADATA +0 -47
  52. duckdb-1.4.0.dev2849.dist-info/RECORD +0 -48
  53. duckdb-1.4.0.dev2849.dist-info/top_level.txt +0 -3
  54. duckdb-stubs/__init__.pyi +0 -712
  55. duckdb-stubs/functional/__init__.pyi +0 -33
  56. duckdb-stubs/typing/__init__.pyi +0 -37
  57. duckdb-stubs/value/constant/__init__.pyi +0 -116
  58. /duckdb-stubs/value/__init__.pyi → /duckdb/py.typed +0 -0
@@ -1,4 +1,4 @@
1
- # https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/
1
+ # https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/ # noqa: D100
2
2
  from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
3
3
 
4
4
  from .types import DataType
@@ -10,11 +10,11 @@ DataTypeOrString = Union[DataType, str]
10
10
  UserDefinedFunctionLike = TypeVar("UserDefinedFunctionLike")
11
11
 
12
12
 
13
- class UDFRegistration:
14
- def __init__(self, sparkSession: "SparkSession"):
13
+ class UDFRegistration: # noqa: D101
14
+ def __init__(self, sparkSession: "SparkSession") -> None: # noqa: D107
15
15
  self.sparkSession = sparkSession
16
16
 
17
- def register(
17
+ def register( # noqa: D102
18
18
  self,
19
19
  name: str,
20
20
  f: Union[Callable[..., Any], "UserDefinedFunctionLike"],
@@ -22,7 +22,7 @@ class UDFRegistration:
22
22
  ) -> "UserDefinedFunctionLike":
23
23
  self.sparkSession.conn.create_function(name, f, return_type=returnType)
24
24
 
25
- def registerJavaFunction(
25
+ def registerJavaFunction( # noqa: D102
26
26
  self,
27
27
  name: str,
28
28
  javaClassName: str,
@@ -30,7 +30,7 @@ class UDFRegistration:
30
30
  ) -> None:
31
31
  raise NotImplementedError
32
32
 
33
- def registerJavaUDAF(self, name: str, javaClassName: str) -> None:
33
+ def registerJavaUDAF(self, name: str, javaClassName: str) -> None: # noqa: D102
34
34
  raise NotImplementedError
35
35
 
36
36
 
duckdb/filesystem.py CHANGED
@@ -1,23 +1,33 @@
1
- from fsspec import filesystem, AbstractFileSystem
2
- from fsspec.implementations.memory import MemoryFileSystem, MemoryFile
3
- from .bytes_io_wrapper import BytesIOWrapper
4
- from io import TextIOBase
1
+ """In-memory filesystem to store ephemeral dependencies.
2
+
3
+ Warning: Not for external use. May change at any moment. Likely to be made internal.
4
+ """
5
+
6
+ from __future__ import annotations
5
7
 
6
- def is_file_like(obj):
7
- # We only care that we can read from the file
8
- return hasattr(obj, "read") and hasattr(obj, "seek")
8
+ import io
9
+ import typing
10
+
11
+ from fsspec import AbstractFileSystem
12
+ from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
13
+
14
+ from .bytes_io_wrapper import BytesIOWrapper
9
15
 
10
16
 
11
17
  class ModifiedMemoryFileSystem(MemoryFileSystem):
12
- protocol = ('DUCKDB_INTERNAL_OBJECTSTORE',)
18
+ """In-memory filesystem implementation that uses its own protocol."""
19
+
20
+ protocol = ("DUCKDB_INTERNAL_OBJECTSTORE",)
13
21
  # defer to the original implementation that doesn't hardcode the protocol
14
- _strip_protocol = classmethod(AbstractFileSystem._strip_protocol.__func__)
22
+ _strip_protocol: typing.Callable[[str], str] = classmethod(AbstractFileSystem._strip_protocol.__func__) # type: ignore[assignment]
15
23
 
16
- def add_file(self, object, path):
17
- if not is_file_like(object):
18
- raise ValueError("Can not read from a non file-like object")
19
- path = self._strip_protocol(path)
20
- if isinstance(object, TextIOBase):
24
+ def add_file(self, obj: io.IOBase | BytesIOWrapper | object, path: str) -> None:
25
+ """Add a file to the filesystem."""
26
+ if not (hasattr(obj, "read") and hasattr(obj, "seek")):
27
+ msg = "Can not read from a non file-like object"
28
+ raise TypeError(msg)
29
+ if isinstance(obj, io.TextIOBase):
21
30
  # Wrap this so that we can return a bytes object from 'read'
22
- object = BytesIOWrapper(object)
23
- self.store[path] = MemoryFile(self, path, object.read())
31
+ obj = BytesIOWrapper(obj)
32
+ path = self._strip_protocol(path)
33
+ self.store[path] = MemoryFile(self, path, obj.read())
@@ -0,0 +1,3 @@
1
+ from _duckdb._func import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType # noqa: D104
2
+
3
+ __all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"]
@@ -1,17 +1,13 @@
1
- from duckdb.duckdb.functional import (
2
- FunctionNullHandling,
3
- PythonUDFType,
4
- SPECIAL,
5
- DEFAULT,
6
- NATIVE,
7
- ARROW
8
- )
1
+ """DuckDB function constants and types. DEPRECATED: please use `duckdb.func` instead."""
2
+
3
+ import warnings
4
+
5
+ from duckdb.func import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType
9
6
 
10
- __all__ = [
11
- "FunctionNullHandling",
12
- "PythonUDFType",
13
- "SPECIAL",
14
- "DEFAULT",
15
- "NATIVE",
16
- "ARROW"
17
- ]
7
+ __all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"]
8
+
9
+ warnings.warn(
10
+ "`duckdb.functional` is deprecated and will be removed in a future version. Please use `duckdb.func` instead.",
11
+ DeprecationWarning,
12
+ stacklevel=2,
13
+ )
duckdb/polars_io.py CHANGED
@@ -1,20 +1,30 @@
1
- import duckdb
2
- import polars as pl
3
- from typing import Iterator, Optional
1
+ from __future__ import annotations # noqa: D100
4
2
 
5
- from polars.io.plugins import register_io_source
6
- from duckdb import SQLExpression
3
+ import contextlib
4
+ import datetime
7
5
  import json
6
+ import typing
8
7
  from decimal import Decimal
9
- import datetime
10
8
 
11
- def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
12
- """
13
- Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
14
-
9
+ import polars as pl
10
+ from polars.io.plugins import register_io_source
11
+
12
+ import duckdb
13
+
14
+ if typing.TYPE_CHECKING:
15
+ from collections.abc import Iterator
16
+
17
+ import typing_extensions
18
+
19
+ _ExpressionTree: typing_extensions.TypeAlias = typing.Dict[str, typing.Union[str, int, "_ExpressionTree", typing.Any]] # noqa: UP006
20
+
21
+
22
+ def _predicate_to_expression(predicate: pl.Expr) -> duckdb.Expression | None:
23
+ """Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
24
+
15
25
  Parameters:
16
26
  predicate (pl.Expr): A Polars expression (e.g., col("foo") > 5)
17
-
27
+
18
28
  Returns:
19
29
  SQLExpression: A DuckDB SQL expression string equivalent.
20
30
  None: If conversion fails.
@@ -25,20 +35,19 @@ def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
25
35
  """
26
36
  # Serialize the Polars expression tree to JSON
27
37
  tree = json.loads(predicate.meta.serialize(format="json"))
28
-
38
+
29
39
  try:
30
40
  # Convert the tree to SQL
31
41
  sql_filter = _pl_tree_to_sql(tree)
32
- return SQLExpression(sql_filter)
33
- except:
42
+ return duckdb.SQLExpression(sql_filter)
43
+ except Exception:
34
44
  # If the conversion fails, we return None
35
45
  return None
36
46
 
37
47
 
38
48
  def _pl_operation_to_sql(op: str) -> str:
39
- """
40
- Map Polars binary operation strings to SQL equivalents.
41
-
49
+ """Map Polars binary operation strings to SQL equivalents.
50
+
42
51
  Example:
43
52
  >>> _pl_operation_to_sql("Eq")
44
53
  '='
@@ -55,19 +64,29 @@ def _pl_operation_to_sql(op: str) -> str:
55
64
  "Or": "OR",
56
65
  }[op]
57
66
  except KeyError:
58
- raise NotImplementedError(op)
67
+ raise NotImplementedError(op) # noqa: B904
68
+
59
69
 
70
+ def _escape_sql_identifier(identifier: str) -> str:
71
+ """Escape SQL identifiers by doubling any double quotes and wrapping in double quotes.
60
72
 
61
- def _pl_tree_to_sql(tree: dict) -> str:
73
+ Example:
74
+ >>> _escape_sql_identifier('column"name')
75
+ '"column""name"'
62
76
  """
63
- Recursively convert a Polars expression tree (as JSON) to a SQL string.
64
-
77
+ escaped = identifier.replace('"', '""')
78
+ return f'"{escaped}"'
79
+
80
+
81
+ def _pl_tree_to_sql(tree: _ExpressionTree) -> str:
82
+ """Recursively convert a Polars expression tree (as JSON) to a SQL string.
83
+
65
84
  Parameters:
66
85
  tree (dict): JSON-deserialized expression tree from Polars
67
-
86
+
68
87
  Returns:
69
88
  str: SQL expression string
70
-
89
+
71
90
  Example:
72
91
  Input tree:
73
92
  {
@@ -80,35 +99,51 @@ def _pl_tree_to_sql(tree: dict) -> str:
80
99
  Output: "(foo > 5)"
81
100
  """
82
101
  [node_type] = tree.keys()
83
- subtree = tree[node_type]
84
102
 
85
103
  if node_type == "BinaryExpr":
86
104
  # Binary expressions: left OP right
87
- return (
88
- "(" +
89
- " ".join((
90
- _pl_tree_to_sql(subtree['left']),
91
- _pl_operation_to_sql(subtree['op']),
92
- _pl_tree_to_sql(subtree['right'])
93
- )) +
94
- ")"
95
- )
105
+ bin_expr_tree = tree[node_type]
106
+ assert isinstance(bin_expr_tree, dict), f"A {node_type} should be a dict but got {type(bin_expr_tree)}"
107
+ lhs, op, rhs = bin_expr_tree["left"], bin_expr_tree["op"], bin_expr_tree["right"]
108
+ assert isinstance(lhs, dict), f"LHS of a {node_type} should be a dict but got {type(lhs)}"
109
+ assert isinstance(op, str), f"The op of a {node_type} should be a str but got {type(op)}"
110
+ assert isinstance(rhs, dict), f"RHS of a {node_type} should be a dict but got {type(rhs)}"
111
+ return f"({_pl_tree_to_sql(lhs)} {_pl_operation_to_sql(op)} {_pl_tree_to_sql(rhs)})"
96
112
  if node_type == "Column":
97
113
  # A reference to a column name
98
- return subtree
114
+ # Wrap in quotes to handle special characters
115
+ col_name = tree[node_type]
116
+ assert isinstance(col_name, str), f"The col name of a {node_type} should be a str but got {type(col_name)}"
117
+ return _escape_sql_identifier(col_name)
99
118
 
100
119
  if node_type in ("Literal", "Dyn"):
101
120
  # Recursively process dynamic or literal values
102
- return _pl_tree_to_sql(subtree)
121
+ val_tree = tree[node_type]
122
+ assert isinstance(val_tree, dict), f"A {node_type} should be a dict but got {type(val_tree)}"
123
+ return _pl_tree_to_sql(val_tree)
103
124
 
104
125
  if node_type == "Int":
105
126
  # Direct integer literals
106
- return str(subtree)
127
+ int_literal = tree[node_type]
128
+ assert isinstance(int_literal, (int, str)), (
129
+ f"The value of an Int should be an int or str but got {type(int_literal)}"
130
+ )
131
+ return str(int_literal)
107
132
 
108
133
  if node_type == "Function":
109
134
  # Handle boolean functions like IsNull, IsNotNull
110
- inputs = subtree["input"]
111
- func_dict = subtree["function"]
135
+ func_tree = tree[node_type]
136
+ assert isinstance(func_tree, dict), f"A {node_type} should be a dict but got {type(func_tree)}"
137
+ inputs = func_tree["input"]
138
+ assert isinstance(inputs, list), f"A {node_type} should have a list of dicts as input but got {type(inputs)}"
139
+ input_tree = inputs[0]
140
+ assert isinstance(input_tree, dict), (
141
+ f"A {node_type} should have a list of dicts as input but got {type(input_tree)}"
142
+ )
143
+ func_dict = func_tree["function"]
144
+ assert isinstance(func_dict, dict), (
145
+ f"A {node_type} should have a function dict as input but got {type(func_dict)}"
146
+ )
112
147
 
113
148
  if "Boolean" in func_dict:
114
149
  func = func_dict["Boolean"]
@@ -118,91 +153,119 @@ def _pl_tree_to_sql(tree: dict) -> str:
118
153
  return f"({arg_sql} IS NULL)"
119
154
  if func == "IsNotNull":
120
155
  return f"({arg_sql} IS NOT NULL)"
121
- raise NotImplementedError(f"Boolean function not supported: {func}")
156
+ msg = f"Boolean function not supported: {func}"
157
+ raise NotImplementedError(msg)
122
158
 
123
- raise NotImplementedError(f"Unsupported function type: {func_dict}")
159
+ msg = f"Unsupported function type: {func_dict}"
160
+ raise NotImplementedError(msg)
124
161
 
125
162
  if node_type == "Scalar":
126
163
  # Detect format: old style (dtype/value) or new style (direct type key)
127
- if "dtype" in subtree and "value" in subtree:
128
- dtype = str(subtree["dtype"])
129
- value = subtree["value"]
164
+ scalar_tree = tree[node_type]
165
+ assert isinstance(scalar_tree, dict), f"A {node_type} should be a dict but got {type(scalar_tree)}"
166
+ if "dtype" in scalar_tree and "value" in scalar_tree:
167
+ dtype = str(scalar_tree["dtype"])
168
+ value = scalar_tree["value"]
130
169
  else:
131
170
  # New style: dtype is the single key in the dict
132
- dtype = next(iter(subtree.keys()))
133
- value = subtree
171
+ dtype = next(iter(scalar_tree.keys()))
172
+ value = scalar_tree
173
+ assert isinstance(dtype, str), f"A {node_type} should have a str dtype but got {type(dtype)}"
174
+ assert isinstance(value, dict), f"A {node_type} should have a dict value but got {type(value)}"
134
175
 
135
176
  # Decimal support
136
177
  if dtype.startswith("{'Decimal'") or dtype == "Decimal":
137
- decimal_value = value['Decimal']
138
- decimal_value = Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1])
139
- return str(decimal_value)
178
+ decimal_value = value["Decimal"]
179
+ assert isinstance(decimal_value, list), (
180
+ f"A {dtype} should be a two or three member list but got {type(decimal_value)}"
181
+ )
182
+ assert 2 <= len(decimal_value) <= 3, (
183
+ f"A {dtype} should be a two or three member list but got {len(decimal_value)} member list"
184
+ )
185
+ return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[-1]))
140
186
 
141
187
  # Datetime with microseconds since epoch
142
188
  if dtype.startswith("{'Datetime'") or dtype == "Datetime":
143
- micros = value['Datetime'][0]
144
- dt_timestamp = datetime.datetime.fromtimestamp(micros / 1_000_000, tz=datetime.UTC)
145
- return f"'{str(dt_timestamp)}'::TIMESTAMP"
189
+ micros = value["Datetime"]
190
+ assert isinstance(micros, list), f"A {dtype} should be a one member list but got {type(micros)}"
191
+ dt_timestamp = datetime.datetime.fromtimestamp(micros[0] / 1_000_000, tz=datetime.timezone.utc)
192
+ return f"'{dt_timestamp!s}'::TIMESTAMP"
146
193
 
147
194
  # Match simple numeric/boolean types
148
- if dtype in ("Int8", "Int16", "Int32", "Int64",
149
- "UInt8", "UInt16", "UInt32", "UInt64",
150
- "Float32", "Float64", "Boolean"):
195
+ if dtype in (
196
+ "Int8",
197
+ "Int16",
198
+ "Int32",
199
+ "Int64",
200
+ "UInt8",
201
+ "UInt16",
202
+ "UInt32",
203
+ "UInt64",
204
+ "Float32",
205
+ "Float64",
206
+ "Boolean",
207
+ ):
151
208
  return str(value[dtype])
152
209
 
153
210
  # Time type
154
211
  if dtype == "Time":
155
212
  nanoseconds = value["Time"]
213
+ assert isinstance(nanoseconds, int), f"A {dtype} should be an int but got {type(nanoseconds)}"
156
214
  seconds = nanoseconds // 1_000_000_000
157
215
  microseconds = (nanoseconds % 1_000_000_000) // 1_000
158
- dt_time = (datetime.datetime.min + datetime.timedelta(
159
- seconds=seconds, microseconds=microseconds
160
- )).time()
216
+ dt_time = (datetime.datetime.min + datetime.timedelta(seconds=seconds, microseconds=microseconds)).time()
161
217
  return f"'{dt_time}'::TIME"
162
218
 
163
219
  # Date type
164
220
  if dtype == "Date":
165
221
  days_since_epoch = value["Date"]
222
+ assert isinstance(days_since_epoch, (float, int)), (
223
+ f"A {dtype} should be a number but got {type(days_since_epoch)}"
224
+ )
166
225
  date = datetime.date(1970, 1, 1) + datetime.timedelta(days=days_since_epoch)
167
226
  return f"'{date}'::DATE"
168
227
 
169
228
  # Binary type
170
229
  if dtype == "Binary":
171
- binary_data = bytes(value["Binary"])
172
- escaped = ''.join(f'\\x{b:02x}' for b in binary_data)
230
+ bin_value = value["Binary"]
231
+ assert isinstance(bin_value, list), f"A {dtype} should be a list but got {type(bin_value)}"
232
+ binary_data = bytes(bin_value)
233
+ escaped = "".join(f"\\x{b:02x}" for b in binary_data)
173
234
  return f"'{escaped}'::BLOB"
174
235
 
175
236
  # String type
176
237
  if dtype == "String" or dtype == "StringOwned":
177
238
  # Some new formats may store directly under StringOwned
178
- string_val = value.get("StringOwned", value.get("String", None))
239
+ string_val: object | None = value.get("StringOwned", value.get("String", None))
179
240
  return f"'{string_val}'"
180
241
 
242
+ msg = f"Unsupported scalar type {dtype!s}, with value {value}"
243
+ raise NotImplementedError(msg)
181
244
 
182
- raise NotImplementedError(f"Unsupported scalar type {str(dtype)}, with value {value}")
245
+ msg = f"Node type: {node_type} is not implemented. {tree[node_type]}"
246
+ raise NotImplementedError(msg)
183
247
 
184
- raise NotImplementedError(f"Node type: {node_type} is not implemented. {subtree}")
185
248
 
186
249
  def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -> pl.LazyFrame:
187
- """
188
- A polars IO plugin for DuckDB.
189
- """
250
+ """A polars IO plugin for DuckDB."""
251
+
190
252
  def source_generator(
191
- with_columns: Optional[list[str]],
192
- predicate: Optional[pl.Expr],
193
- n_rows: Optional[int],
194
- batch_size: Optional[int],
253
+ with_columns: list[str] | None,
254
+ predicate: pl.Expr | None,
255
+ n_rows: int | None,
256
+ batch_size: int | None,
195
257
  ) -> Iterator[pl.DataFrame]:
196
258
  duck_predicate = None
197
259
  relation_final = relation
198
260
  if with_columns is not None:
199
- cols = ",".join(with_columns)
261
+ cols = ",".join(map(_escape_sql_identifier, with_columns))
200
262
  relation_final = relation_final.project(cols)
201
263
  if n_rows is not None:
202
264
  relation_final = relation_final.limit(n_rows)
203
265
  if predicate is not None:
204
266
  # We have a predicate, if possible, we push it down to DuckDB
205
- duck_predicate = _predicate_to_expression(predicate)
267
+ with contextlib.suppress(AssertionError, KeyError):
268
+ duck_predicate = _predicate_to_expression(predicate)
206
269
  # Try to pushdown filter, if one exists
207
270
  if duck_predicate is not None:
208
271
  relation_final = relation_final.filter(duck_predicate)
@@ -210,16 +273,12 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -
210
273
  results = relation_final.fetch_arrow_reader()
211
274
  else:
212
275
  results = relation_final.fetch_arrow_reader(batch_size)
213
- while True:
214
- try:
215
- record_batch = results.read_next_batch()
216
- df = pl.from_arrow(record_batch)
217
- if predicate is not None and duck_predicate is None:
218
- # We have a predicate, but did not manage to push it down, we fallback here
219
- yield pl.from_arrow(record_batch).filter(predicate)
220
- else:
221
- yield pl.from_arrow(record_batch)
222
- except StopIteration:
223
- break
276
+
277
+ for record_batch in iter(results.read_next_batch, None):
278
+ if predicate is not None and duck_predicate is None:
279
+ # We have a predicate, but did not manage to push it down, we fallback here
280
+ yield pl.from_arrow(record_batch).filter(predicate) # type: ignore[arg-type,misc,unused-ignore]
281
+ else:
282
+ yield pl.from_arrow(record_batch) # type: ignore[misc,unused-ignore]
224
283
 
225
284
  return register_io_source(source_generator, schema=schema)