duckdb 1.4.1.dev125__cp313-cp313-win_amd64.whl → 1.5.0.dev94__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckdb might be problematic. Click here for more details.

@@ -0,0 +1,231 @@
1
+ """DuckDB DB API 2.0 Type Objects Module.
2
+
3
+ This module provides DB API 2.0 compliant type objects for DuckDB, allowing applications
4
+ to check column types returned by queries against standard database API categories.
5
+
6
+ Example:
7
+ >>> import duckdb
8
+ >>>
9
+ >>> conn = duckdb.connect()
10
+ >>> cursor = conn.cursor()
11
+ >>> cursor.execute("SELECT 'hello' as text_col, 42 as num_col, CURRENT_DATE as date_col")
12
+ >>>
13
+ >>> # Check column types using DB API type objects
14
+ >>> for i, desc in enumerate(cursor.description):
15
+ >>> col_name, col_type = desc[0], desc[1]
16
+ >>> if col_type == duckdb.STRING:
17
+ >>> print(f"{col_name} is a string type")
18
+ >>> elif col_type == duckdb.NUMBER:
19
+ >>> print(f"{col_name} is a numeric type")
20
+ >>> elif col_type == duckdb.DATETIME:
21
+ >>> print(f"{col_name} is a date/time type")
22
+
23
+ See Also:
24
+ - PEP 249: https://peps.python.org/pep-0249/
25
+ - DuckDB Type System: https://duckdb.org/docs/sql/data_types/overview
26
+ """
27
+
28
+ from duckdb import sqltypes
29
+
30
+
31
+ class DBAPITypeObject:
32
+ """DB API 2.0 type object for categorizing database column types.
33
+
34
+ This class implements the type objects defined in PEP 249 (DB API 2.0).
35
+ It allows checking whether a specific DuckDB type belongs to a broader
36
+ category like STRING, NUMBER, DATETIME, etc.
37
+
38
+ The type object supports equality comparison with DuckDBPyType instances,
39
+ returning True if the type belongs to this category.
40
+
41
+ Args:
42
+ types: A list of DuckDBPyType instances that belong to this type category.
43
+
44
+ Example:
45
+ >>> string_types = DBAPITypeObject([sqltypes.VARCHAR, sqltypes.CHAR])
46
+ >>> result = sqltypes.VARCHAR == string_types # True
47
+ >>> result = sqltypes.INTEGER == string_types # False
48
+
49
+ Note:
50
+ This follows the DB API 2.0 specification where type objects are compared
51
+ using equality operators rather than isinstance() checks.
52
+ """
53
+
54
+ def __init__(self, types: list[sqltypes.DuckDBPyType]) -> None:
55
+ """Initialize a DB API type object.
56
+
57
+ Args:
58
+ types: List of DuckDB types that belong to this category.
59
+ """
60
+ self.types = types
61
+
62
+ def __eq__(self, other: object) -> bool:
63
+ """Check if a DuckDB type belongs to this type category.
64
+
65
+ This method implements the DB API 2.0 type checking mechanism.
66
+ It returns True if the other object is a DuckDBPyType that
67
+ is contained in this type category.
68
+
69
+ Args:
70
+ other: The object to compare, typically a DuckDBPyType instance.
71
+
72
+ Returns:
73
+ True if other is a DuckDBPyType in this category, False otherwise.
74
+
75
+ Example:
76
+ >>> NUMBER == sqltypes.INTEGER # True
77
+ >>> NUMBER == sqltypes.VARCHAR # False
78
+ """
79
+ if isinstance(other, sqltypes.DuckDBPyType):
80
+ return other in self.types
81
+ return False
82
+
83
+ def __repr__(self) -> str:
84
+ """Return a string representation of this type object.
85
+
86
+ Returns:
87
+ A string showing the type object and its contained DuckDB types.
88
+
89
+ Example:
90
+ >>> repr(STRING)
91
+ '<DBAPITypeObject [VARCHAR]>'
92
+ """
93
+ return f"<DBAPITypeObject [{','.join(str(x) for x in self.types)}]>"
94
+
95
+
96
+ # Define the standard DB API 2.0 type objects for DuckDB
97
+
98
+ STRING = DBAPITypeObject([sqltypes.VARCHAR])
99
+ """
100
+ STRING type object for text-based database columns.
101
+
102
+ This type object represents all string/text types in DuckDB. Currently includes:
103
+ - VARCHAR: Variable-length character strings
104
+
105
+ Use this to check if a column contains textual data that should be handled
106
+ as Python strings.
107
+
108
+ DB API 2.0 Reference:
109
+ https://peps.python.org/pep-0249/#string
110
+
111
+ Example:
112
+ >>> cursor.description[0][1] == STRING # Check if first column is text
113
+ """
114
+
115
+ NUMBER = DBAPITypeObject(
116
+ [
117
+ sqltypes.TINYINT,
118
+ sqltypes.UTINYINT,
119
+ sqltypes.SMALLINT,
120
+ sqltypes.USMALLINT,
121
+ sqltypes.INTEGER,
122
+ sqltypes.UINTEGER,
123
+ sqltypes.BIGINT,
124
+ sqltypes.UBIGINT,
125
+ sqltypes.HUGEINT,
126
+ sqltypes.UHUGEINT,
127
+ sqltypes.DuckDBPyType("BIGNUM"),
128
+ sqltypes.DuckDBPyType("DECIMAL"),
129
+ sqltypes.FLOAT,
130
+ sqltypes.DOUBLE,
131
+ ]
132
+ )
133
+ """
134
+ NUMBER type object for numeric database columns.
135
+
136
+ This type object represents all numeric types in DuckDB, including:
137
+
138
+ Integer Types:
139
+ - TINYINT, UTINYINT: 8-bit signed/unsigned integers
140
+ - SMALLINT, USMALLINT: 16-bit signed/unsigned integers
141
+ - INTEGER, UINTEGER: 32-bit signed/unsigned integers
142
+ - BIGINT, UBIGINT: 64-bit signed/unsigned integers
143
+ - HUGEINT, UHUGEINT: 128-bit signed/unsigned integers
144
+
145
+ Decimal Types:
146
+ - BIGNUM: Arbitrary precision integers
147
+ - DECIMAL: Fixed-point decimal numbers
148
+
149
+ Floating Point Types:
150
+ - FLOAT: 32-bit floating point
151
+ - DOUBLE: 64-bit floating point
152
+
153
+ Use this to check if a column contains numeric data that should be handled
154
+ as Python int, float, or Decimal objects.
155
+
156
+ DB API 2.0 Reference:
157
+ https://peps.python.org/pep-0249/#number
158
+
159
+ Example:
160
+ >>> cursor.description[1][1] == NUMBER # Check if second column is numeric
161
+ """
162
+
163
+ DATETIME = DBAPITypeObject(
164
+ [
165
+ sqltypes.DATE,
166
+ sqltypes.TIME,
167
+ sqltypes.TIME_TZ,
168
+ sqltypes.TIMESTAMP,
169
+ sqltypes.TIMESTAMP_TZ,
170
+ sqltypes.TIMESTAMP_NS,
171
+ sqltypes.TIMESTAMP_MS,
172
+ sqltypes.TIMESTAMP_S,
173
+ ]
174
+ )
175
+ """
176
+ DATETIME type object for date and time database columns.
177
+
178
+ This type object represents all date/time types in DuckDB, including:
179
+
180
+ Date Types:
181
+ - DATE: Calendar dates (year, month, day)
182
+
183
+ Time Types:
184
+ - TIME: Time of day without timezone
185
+ - TIME_TZ: Time of day with timezone
186
+
187
+ Timestamp Types:
188
+ - TIMESTAMP: Date and time without timezone (microsecond precision)
189
+ - TIMESTAMP_TZ: Date and time with timezone
190
+ - TIMESTAMP_NS: Nanosecond precision timestamps
191
+ - TIMESTAMP_MS: Millisecond precision timestamps
192
+ - TIMESTAMP_S: Second precision timestamps
193
+
194
+ Use this to check if a column contains temporal data that should be handled
195
+ as Python datetime, date, or time objects.
196
+
197
+ DB API 2.0 Reference:
198
+ https://peps.python.org/pep-0249/#datetime
199
+
200
+ Example:
201
+ >>> cursor.description[2][1] == DATETIME # Check if third column is date/time
202
+ """
203
+
204
+ BINARY = DBAPITypeObject([sqltypes.BLOB])
205
+ """
206
+ BINARY type object for binary data database columns.
207
+
208
+ This type object represents binary data types in DuckDB:
209
+ - BLOB: Binary Large Objects for storing arbitrary binary data
210
+
211
+ Use this to check if a column contains binary data that should be handled
212
+ as Python bytes objects.
213
+
214
+ DB API 2.0 Reference:
215
+ https://peps.python.org/pep-0249/#binary
216
+
217
+ Example:
218
+ >>> cursor.description[3][1] == BINARY # Check if fourth column is binary
219
+ """
220
+
221
+ ROWID = None
222
+ """
223
+ ROWID type object for row identifier columns.
224
+
225
+ DB API 2.0 Reference:
226
+ https://peps.python.org/pep-0249/#rowid
227
+
228
+ Note:
229
+ This will always be None for DuckDB connections. Applications should not
230
+ rely on ROWID functionality when using DuckDB.
231
+ """
duckdb/_version.py ADDED
@@ -0,0 +1,22 @@
1
+ # ----------------------------------------------------------------------
2
+ # Version API
3
+ #
4
+ # We provide three symbols:
5
+ # - duckdb.__version__: The version of this package
6
+ # - duckdb.__duckdb_version__: The version of duckdb that is bundled
7
+ # - duckdb.version(): A human-readable version string containing both of the above
8
+ # ----------------------------------------------------------------------
9
+ from importlib.metadata import version as _dist_version
10
+
11
+ import _duckdb
12
+
13
+ __version__: str = _dist_version("duckdb")
14
+ """Version of the DuckDB Python Package."""
15
+
16
+ __duckdb_version__: str = _duckdb.__version__
17
+ """Version of DuckDB that is bundled."""
18
+
19
+
20
+ def version() -> str:
21
+ """Human-friendly formatted version string of both the distribution package and the bundled DuckDB engine."""
22
+ return f"{__version__} (with duckdb {_duckdb.__version__})"
@@ -1,7 +1,5 @@
1
- from io import StringIO, TextIOBase # noqa: D100
2
- from typing import Any, Union
1
+ """StringIO buffer wrapper.
3
2
 
4
- """
5
3
  BSD 3-Clause License
6
4
 
7
5
  Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
@@ -35,10 +33,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35
33
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
34
  """
37
35
 
36
+ from io import StringIO, TextIOBase
37
+ from typing import Any, Union
38
+
39
+
40
+ class BytesIOWrapper:
41
+ """Wrapper that wraps a StringIO buffer and reads bytes from it.
42
+
43
+ Created for compat with pyarrow read_csv.
44
+ """
38
45
 
39
- class BytesIOWrapper: # noqa: D101
40
- # Wrapper that wraps a StringIO buffer and reads bytes from it
41
- # Created for compat with pyarrow read_csv
42
46
  def __init__(self, buffer: Union[StringIO, TextIOBase], encoding: str = "utf-8") -> None: # noqa: D107
43
47
  self.buffer = buffer
44
48
  self.encoding = encoding
@@ -8,7 +8,7 @@ if TYPE_CHECKING:
8
8
  from ._typing import DateTimeLiteral, DecimalLiteral, LiteralType
9
9
 
10
10
  from duckdb import ColumnExpression, ConstantExpression, Expression, FunctionExpression
11
- from duckdb.typing import DuckDBPyType
11
+ from duckdb.sqltypes import DuckDBPyType
12
12
 
13
13
  __all__ = ["Column"]
14
14
 
@@ -1,6 +1,6 @@
1
1
  from typing import cast # noqa: D100
2
2
 
3
- from duckdb.typing import DuckDBPyType
3
+ from duckdb.sqltypes import DuckDBPyType
4
4
 
5
5
  from .types import (
6
6
  ArrayType,
@@ -22,7 +22,7 @@ from typing import (
22
22
  )
23
23
 
24
24
  import duckdb
25
- from duckdb.typing import DuckDBPyType
25
+ from duckdb.sqltypes import DuckDBPyType
26
26
 
27
27
  from ..exception import ContributionsAcceptedError
28
28
 
duckdb/filesystem.py CHANGED
@@ -1,5 +1,12 @@
1
- from io import TextIOBase # noqa: D100
2
- from typing import IO
1
+ """In-memory filesystem to store ephemeral dependencies.
2
+
3
+ Warning: Not for external use. May change at any moment. Likely to be made internal.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import io
9
+ import typing
3
10
 
4
11
  from fsspec import AbstractFileSystem
5
12
  from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
@@ -7,22 +14,20 @@ from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
7
14
  from .bytes_io_wrapper import BytesIOWrapper
8
15
 
9
16
 
10
- def is_file_like(obj) -> bool: # noqa: D103, ANN001
11
- # We only care that we can read from the file
12
- return hasattr(obj, "read") and hasattr(obj, "seek")
17
+ class ModifiedMemoryFileSystem(MemoryFileSystem):
18
+ """In-memory filesystem implementation that uses its own protocol."""
13
19
 
14
-
15
- class ModifiedMemoryFileSystem(MemoryFileSystem): # noqa: D101
16
20
  protocol = ("DUCKDB_INTERNAL_OBJECTSTORE",)
17
21
  # defer to the original implementation that doesn't hardcode the protocol
18
- _strip_protocol = classmethod(AbstractFileSystem._strip_protocol.__func__)
22
+ _strip_protocol: typing.Callable[[str], str] = classmethod(AbstractFileSystem._strip_protocol.__func__) # type: ignore[assignment]
19
23
 
20
- def add_file(self, object: IO, path: str) -> None: # noqa: D102
21
- if not is_file_like(object):
24
+ def add_file(self, obj: io.IOBase | BytesIOWrapper | object, path: str) -> None:
25
+ """Add a file to the filesystem."""
26
+ if not (hasattr(obj, "read") and hasattr(obj, "seek")):
22
27
  msg = "Can not read from a non file-like object"
23
- raise ValueError(msg)
24
- path = self._strip_protocol(path)
25
- if isinstance(object, TextIOBase):
28
+ raise TypeError(msg)
29
+ if isinstance(obj, io.TextIOBase):
26
30
  # Wrap this so that we can return a bytes object from 'read'
27
- object = BytesIOWrapper(object)
28
- self.store[path] = MemoryFile(self, path, object.read())
31
+ obj = BytesIOWrapper(obj)
32
+ path = self._strip_protocol(path)
33
+ self.store[path] = MemoryFile(self, path, obj.read())
@@ -0,0 +1,3 @@
1
+ from _duckdb._func import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType # noqa: D104
2
+
3
+ __all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"]
@@ -1,3 +1,13 @@
1
- from _duckdb.functional import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType # noqa: D104
1
+ """DuckDB function constants and types. DEPRECATED: please use `duckdb.func` instead."""
2
+
3
+ import warnings
4
+
5
+ from duckdb.func import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType
2
6
 
3
7
  __all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"]
8
+
9
+ warnings.warn(
10
+ "`duckdb.functional` is deprecated and will be removed in a future version. Please use `duckdb.func` instead.",
11
+ DeprecationWarning,
12
+ stacklevel=2,
13
+ )
duckdb/polars_io.py CHANGED
@@ -1,17 +1,25 @@
1
- import datetime # noqa: D100
1
+ from __future__ import annotations # noqa: D100
2
+
3
+ import contextlib
4
+ import datetime
2
5
  import json
3
- from collections.abc import Iterator
6
+ import typing
4
7
  from decimal import Decimal
5
- from typing import Optional
6
8
 
7
9
  import polars as pl
8
10
  from polars.io.plugins import register_io_source
9
11
 
10
12
  import duckdb
11
- from duckdb import SQLExpression
12
13
 
14
+ if typing.TYPE_CHECKING:
15
+ from collections.abc import Iterator
16
+
17
+ import typing_extensions
18
+
19
+ _ExpressionTree: typing_extensions.TypeAlias = typing.Dict[str, typing.Union[str, int, "_ExpressionTree", typing.Any]] # noqa: UP006
13
20
 
14
- def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
21
+
22
+ def _predicate_to_expression(predicate: pl.Expr) -> duckdb.Expression | None:
15
23
  """Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
16
24
 
17
25
  Parameters:
@@ -31,7 +39,7 @@ def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
31
39
  try:
32
40
  # Convert the tree to SQL
33
41
  sql_filter = _pl_tree_to_sql(tree)
34
- return SQLExpression(sql_filter)
42
+ return duckdb.SQLExpression(sql_filter)
35
43
  except Exception:
36
44
  # If the conversion fails, we return None
37
45
  return None
@@ -70,7 +78,7 @@ def _escape_sql_identifier(identifier: str) -> str:
70
78
  return f'"{escaped}"'
71
79
 
72
80
 
73
- def _pl_tree_to_sql(tree: dict) -> str:
81
+ def _pl_tree_to_sql(tree: _ExpressionTree) -> str:
74
82
  """Recursively convert a Polars expression tree (as JSON) to a SQL string.
75
83
 
76
84
  Parameters:
@@ -91,38 +99,51 @@ def _pl_tree_to_sql(tree: dict) -> str:
91
99
  Output: "(foo > 5)"
92
100
  """
93
101
  [node_type] = tree.keys()
94
- subtree = tree[node_type]
95
102
 
96
103
  if node_type == "BinaryExpr":
97
104
  # Binary expressions: left OP right
98
- return (
99
- "("
100
- + " ".join(
101
- (
102
- _pl_tree_to_sql(subtree["left"]),
103
- _pl_operation_to_sql(subtree["op"]),
104
- _pl_tree_to_sql(subtree["right"]),
105
- )
106
- )
107
- + ")"
108
- )
105
+ bin_expr_tree = tree[node_type]
106
+ assert isinstance(bin_expr_tree, dict), f"A {node_type} should be a dict but got {type(bin_expr_tree)}"
107
+ lhs, op, rhs = bin_expr_tree["left"], bin_expr_tree["op"], bin_expr_tree["right"]
108
+ assert isinstance(lhs, dict), f"LHS of a {node_type} should be a dict but got {type(lhs)}"
109
+ assert isinstance(op, str), f"The op of a {node_type} should be a str but got {type(op)}"
110
+ assert isinstance(rhs, dict), f"RHS of a {node_type} should be a dict but got {type(rhs)}"
111
+ return f"({_pl_tree_to_sql(lhs)} {_pl_operation_to_sql(op)} {_pl_tree_to_sql(rhs)})"
109
112
  if node_type == "Column":
110
113
  # A reference to a column name
111
114
  # Wrap in quotes to handle special characters
112
- return _escape_sql_identifier(subtree)
115
+ col_name = tree[node_type]
116
+ assert isinstance(col_name, str), f"The col name of a {node_type} should be a str but got {type(col_name)}"
117
+ return _escape_sql_identifier(col_name)
113
118
 
114
119
  if node_type in ("Literal", "Dyn"):
115
120
  # Recursively process dynamic or literal values
116
- return _pl_tree_to_sql(subtree)
121
+ val_tree = tree[node_type]
122
+ assert isinstance(val_tree, dict), f"A {node_type} should be a dict but got {type(val_tree)}"
123
+ return _pl_tree_to_sql(val_tree)
117
124
 
118
125
  if node_type == "Int":
119
126
  # Direct integer literals
120
- return str(subtree)
127
+ int_literal = tree[node_type]
128
+ assert isinstance(int_literal, (int, str)), (
129
+ f"The value of an Int should be an int or str but got {type(int_literal)}"
130
+ )
131
+ return str(int_literal)
121
132
 
122
133
  if node_type == "Function":
123
134
  # Handle boolean functions like IsNull, IsNotNull
124
- inputs = subtree["input"]
125
- func_dict = subtree["function"]
135
+ func_tree = tree[node_type]
136
+ assert isinstance(func_tree, dict), f"A {node_type} should be a dict but got {type(func_tree)}"
137
+ inputs = func_tree["input"]
138
+ assert isinstance(inputs, list), f"A {node_type} should have a list of dicts as input but got {type(inputs)}"
139
+ input_tree = inputs[0]
140
+ assert isinstance(input_tree, dict), (
141
+ f"A {node_type} should have a list of dicts as input but got {type(input_tree)}"
142
+ )
143
+ func_dict = func_tree["function"]
144
+ assert isinstance(func_dict, dict), (
145
+ f"A {node_type} should have a function dict as input but got {type(func_dict)}"
146
+ )
126
147
 
127
148
  if "Boolean" in func_dict:
128
149
  func = func_dict["Boolean"]
@@ -140,24 +161,34 @@ def _pl_tree_to_sql(tree: dict) -> str:
140
161
 
141
162
  if node_type == "Scalar":
142
163
  # Detect format: old style (dtype/value) or new style (direct type key)
143
- if "dtype" in subtree and "value" in subtree:
144
- dtype = str(subtree["dtype"])
145
- value = subtree["value"]
164
+ scalar_tree = tree[node_type]
165
+ assert isinstance(scalar_tree, dict), f"A {node_type} should be a dict but got {type(scalar_tree)}"
166
+ if "dtype" in scalar_tree and "value" in scalar_tree:
167
+ dtype = str(scalar_tree["dtype"])
168
+ value = scalar_tree["value"]
146
169
  else:
147
170
  # New style: dtype is the single key in the dict
148
- dtype = next(iter(subtree.keys()))
149
- value = subtree
171
+ dtype = next(iter(scalar_tree.keys()))
172
+ value = scalar_tree
173
+ assert isinstance(dtype, str), f"A {node_type} should have a str dtype but got {type(dtype)}"
174
+ assert isinstance(value, dict), f"A {node_type} should have a dict value but got {type(value)}"
150
175
 
151
176
  # Decimal support
152
177
  if dtype.startswith("{'Decimal'") or dtype == "Decimal":
153
178
  decimal_value = value["Decimal"]
154
- decimal_value = Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1])
155
- return str(decimal_value)
179
+ assert isinstance(decimal_value, list), (
180
+ f"A {dtype} should be a two or three member list but got {type(decimal_value)}"
181
+ )
182
+ assert 2 <= len(decimal_value) <= 3, (
183
+ f"A {dtype} should be a two or three member list but got {len(decimal_value)} member list"
184
+ )
185
+ return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[-1]))
156
186
 
157
187
  # Datetime with microseconds since epoch
158
188
  if dtype.startswith("{'Datetime'") or dtype == "Datetime":
159
- micros = value["Datetime"][0]
160
- dt_timestamp = datetime.datetime.fromtimestamp(micros / 1_000_000, tz=datetime.UTC)
189
+ micros = value["Datetime"]
190
+ assert isinstance(micros, list), f"A {dtype} should be a one member list but got {type(micros)}"
191
+ dt_timestamp = datetime.datetime.fromtimestamp(micros[0] / 1_000_000, tz=datetime.timezone.utc)
161
192
  return f"'{dt_timestamp!s}'::TIMESTAMP"
162
193
 
163
194
  # Match simple numeric/boolean types
@@ -179,6 +210,7 @@ def _pl_tree_to_sql(tree: dict) -> str:
179
210
  # Time type
180
211
  if dtype == "Time":
181
212
  nanoseconds = value["Time"]
213
+ assert isinstance(nanoseconds, int), f"A {dtype} should be an int but got {type(nanoseconds)}"
182
214
  seconds = nanoseconds // 1_000_000_000
183
215
  microseconds = (nanoseconds % 1_000_000_000) // 1_000
184
216
  dt_time = (datetime.datetime.min + datetime.timedelta(seconds=seconds, microseconds=microseconds)).time()
@@ -187,25 +219,30 @@ def _pl_tree_to_sql(tree: dict) -> str:
187
219
  # Date type
188
220
  if dtype == "Date":
189
221
  days_since_epoch = value["Date"]
222
+ assert isinstance(days_since_epoch, (float, int)), (
223
+ f"A {dtype} should be a number but got {type(days_since_epoch)}"
224
+ )
190
225
  date = datetime.date(1970, 1, 1) + datetime.timedelta(days=days_since_epoch)
191
226
  return f"'{date}'::DATE"
192
227
 
193
228
  # Binary type
194
229
  if dtype == "Binary":
195
- binary_data = bytes(value["Binary"])
230
+ bin_value = value["Binary"]
231
+ assert isinstance(bin_value, list), f"A {dtype} should be a list but got {type(bin_value)}"
232
+ binary_data = bytes(bin_value)
196
233
  escaped = "".join(f"\\x{b:02x}" for b in binary_data)
197
234
  return f"'{escaped}'::BLOB"
198
235
 
199
236
  # String type
200
237
  if dtype == "String" or dtype == "StringOwned":
201
238
  # Some new formats may store directly under StringOwned
202
- string_val = value.get("StringOwned", value.get("String", None))
239
+ string_val: object | None = value.get("StringOwned", value.get("String", None))
203
240
  return f"'{string_val}'"
204
241
 
205
242
  msg = f"Unsupported scalar type {dtype!s}, with value {value}"
206
243
  raise NotImplementedError(msg)
207
244
 
208
- msg = f"Node type: {node_type} is not implemented. {subtree}"
245
+ msg = f"Node type: {node_type} is not implemented. {tree[node_type]}"
209
246
  raise NotImplementedError(msg)
210
247
 
211
248
 
@@ -213,10 +250,10 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -
213
250
  """A polars IO plugin for DuckDB."""
214
251
 
215
252
  def source_generator(
216
- with_columns: Optional[list[str]],
217
- predicate: Optional[pl.Expr],
218
- n_rows: Optional[int],
219
- batch_size: Optional[int],
253
+ with_columns: list[str] | None,
254
+ predicate: pl.Expr | None,
255
+ n_rows: int | None,
256
+ batch_size: int | None,
220
257
  ) -> Iterator[pl.DataFrame]:
221
258
  duck_predicate = None
222
259
  relation_final = relation
@@ -227,7 +264,8 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -
227
264
  relation_final = relation_final.limit(n_rows)
228
265
  if predicate is not None:
229
266
  # We have a predicate, if possible, we push it down to DuckDB
230
- duck_predicate = _predicate_to_expression(predicate)
267
+ with contextlib.suppress(AssertionError, KeyError):
268
+ duck_predicate = _predicate_to_expression(predicate)
231
269
  # Try to pushdown filter, if one exists
232
270
  if duck_predicate is not None:
233
271
  relation_final = relation_final.filter(duck_predicate)
@@ -239,8 +277,8 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -
239
277
  for record_batch in iter(results.read_next_batch, None):
240
278
  if predicate is not None and duck_predicate is None:
241
279
  # We have a predicate, but did not manage to push it down, we fallback here
242
- yield pl.from_arrow(record_batch).filter(predicate)
280
+ yield pl.from_arrow(record_batch).filter(predicate) # type: ignore[arg-type,misc,unused-ignore]
243
281
  else:
244
- yield pl.from_arrow(record_batch)
282
+ yield pl.from_arrow(record_batch) # type: ignore[misc,unused-ignore]
245
283
 
246
284
  return register_io_source(source_generator, schema=schema)