duckdb 1.4.0.dev2849__cp312-cp312-win_amd64.whl → 1.4.2.dev26__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckdb might be problematic. Click here for more details.
- _duckdb-stubs/__init__.pyi +1443 -0
- _duckdb-stubs/_func.pyi +46 -0
- _duckdb-stubs/_sqltypes.pyi +75 -0
- duckdb/duckdb.cp312-win_amd64.pyd → _duckdb.cp312-win_amd64.pyd +0 -0
- adbc_driver_duckdb/__init__.py +11 -8
- adbc_driver_duckdb/dbapi.py +2 -3
- duckdb/__init__.py +343 -388
- duckdb/_dbapi_type_object.py +231 -0
- duckdb/_version.py +22 -0
- duckdb/bytes_io_wrapper.py +12 -9
- duckdb/experimental/__init__.py +2 -1
- duckdb/experimental/spark/__init__.py +3 -4
- duckdb/experimental/spark/_globals.py +8 -8
- duckdb/experimental/spark/_typing.py +7 -9
- duckdb/experimental/spark/conf.py +16 -15
- duckdb/experimental/spark/context.py +60 -44
- duckdb/experimental/spark/errors/__init__.py +33 -35
- duckdb/experimental/spark/errors/error_classes.py +1 -1
- duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
- duckdb/experimental/spark/errors/exceptions/base.py +39 -88
- duckdb/experimental/spark/errors/utils.py +11 -16
- duckdb/experimental/spark/exception.py +9 -6
- duckdb/experimental/spark/sql/__init__.py +5 -5
- duckdb/experimental/spark/sql/_typing.py +8 -15
- duckdb/experimental/spark/sql/catalog.py +21 -20
- duckdb/experimental/spark/sql/column.py +48 -55
- duckdb/experimental/spark/sql/conf.py +9 -8
- duckdb/experimental/spark/sql/dataframe.py +213 -231
- duckdb/experimental/spark/sql/functions.py +1317 -1220
- duckdb/experimental/spark/sql/group.py +56 -52
- duckdb/experimental/spark/sql/readwriter.py +80 -94
- duckdb/experimental/spark/sql/session.py +64 -59
- duckdb/experimental/spark/sql/streaming.py +9 -10
- duckdb/experimental/spark/sql/type_utils.py +67 -65
- duckdb/experimental/spark/sql/types.py +309 -345
- duckdb/experimental/spark/sql/udf.py +6 -6
- duckdb/filesystem.py +26 -16
- duckdb/func/__init__.py +3 -0
- duckdb/functional/__init__.py +12 -16
- duckdb/polars_io.py +141 -82
- duckdb/query_graph/__main__.py +91 -96
- duckdb/sqltypes/__init__.py +63 -0
- duckdb/typing/__init__.py +18 -8
- duckdb/udf.py +10 -5
- duckdb/value/__init__.py +1 -0
- duckdb/value/{constant.py → constant/__init__.py} +62 -60
- duckdb-1.4.2.dev26.dist-info/METADATA +88 -0
- duckdb-1.4.2.dev26.dist-info/RECORD +52 -0
- {duckdb-1.4.0.dev2849.dist-info → duckdb-1.4.2.dev26.dist-info}/WHEEL +1 -1
- duckdb-1.4.2.dev26.dist-info/licenses/LICENSE +7 -0
- duckdb-1.4.0.dev2849.dist-info/METADATA +0 -47
- duckdb-1.4.0.dev2849.dist-info/RECORD +0 -48
- duckdb-1.4.0.dev2849.dist-info/top_level.txt +0 -3
- duckdb-stubs/__init__.pyi +0 -712
- duckdb-stubs/functional/__init__.pyi +0 -33
- duckdb-stubs/typing/__init__.pyi +0 -37
- duckdb-stubs/value/constant/__init__.pyi +0 -116
- /duckdb-stubs/value/__init__.pyi → /duckdb/py.typed +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/
|
|
1
|
+
# https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/ # noqa: D100
|
|
2
2
|
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
|
|
3
3
|
|
|
4
4
|
from .types import DataType
|
|
@@ -10,11 +10,11 @@ DataTypeOrString = Union[DataType, str]
|
|
|
10
10
|
UserDefinedFunctionLike = TypeVar("UserDefinedFunctionLike")
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
class UDFRegistration:
|
|
14
|
-
def __init__(self, sparkSession: "SparkSession"):
|
|
13
|
+
class UDFRegistration: # noqa: D101
|
|
14
|
+
def __init__(self, sparkSession: "SparkSession") -> None: # noqa: D107
|
|
15
15
|
self.sparkSession = sparkSession
|
|
16
16
|
|
|
17
|
-
def register(
|
|
17
|
+
def register( # noqa: D102
|
|
18
18
|
self,
|
|
19
19
|
name: str,
|
|
20
20
|
f: Union[Callable[..., Any], "UserDefinedFunctionLike"],
|
|
@@ -22,7 +22,7 @@ class UDFRegistration:
|
|
|
22
22
|
) -> "UserDefinedFunctionLike":
|
|
23
23
|
self.sparkSession.conn.create_function(name, f, return_type=returnType)
|
|
24
24
|
|
|
25
|
-
def registerJavaFunction(
|
|
25
|
+
def registerJavaFunction( # noqa: D102
|
|
26
26
|
self,
|
|
27
27
|
name: str,
|
|
28
28
|
javaClassName: str,
|
|
@@ -30,7 +30,7 @@ class UDFRegistration:
|
|
|
30
30
|
) -> None:
|
|
31
31
|
raise NotImplementedError
|
|
32
32
|
|
|
33
|
-
def registerJavaUDAF(self, name: str, javaClassName: str) -> None:
|
|
33
|
+
def registerJavaUDAF(self, name: str, javaClassName: str) -> None: # noqa: D102
|
|
34
34
|
raise NotImplementedError
|
|
35
35
|
|
|
36
36
|
|
duckdb/filesystem.py
CHANGED
|
@@ -1,23 +1,33 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
"""In-memory filesystem to store ephemeral dependencies.
|
|
2
|
+
|
|
3
|
+
Warning: Not for external use. May change at any moment. Likely to be made internal.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
5
7
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
8
|
+
import io
|
|
9
|
+
import typing
|
|
10
|
+
|
|
11
|
+
from fsspec import AbstractFileSystem
|
|
12
|
+
from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
|
|
13
|
+
|
|
14
|
+
from .bytes_io_wrapper import BytesIOWrapper
|
|
9
15
|
|
|
10
16
|
|
|
11
17
|
class ModifiedMemoryFileSystem(MemoryFileSystem):
|
|
12
|
-
|
|
18
|
+
"""In-memory filesystem implementation that uses its own protocol."""
|
|
19
|
+
|
|
20
|
+
protocol = ("DUCKDB_INTERNAL_OBJECTSTORE",)
|
|
13
21
|
# defer to the original implementation that doesn't hardcode the protocol
|
|
14
|
-
_strip_protocol = classmethod(AbstractFileSystem._strip_protocol.__func__)
|
|
22
|
+
_strip_protocol: typing.Callable[[str], str] = classmethod(AbstractFileSystem._strip_protocol.__func__) # type: ignore[assignment]
|
|
15
23
|
|
|
16
|
-
def add_file(self, object, path):
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
24
|
+
def add_file(self, obj: io.IOBase | BytesIOWrapper | object, path: str) -> None:
|
|
25
|
+
"""Add a file to the filesystem."""
|
|
26
|
+
if not (hasattr(obj, "read") and hasattr(obj, "seek")):
|
|
27
|
+
msg = "Can not read from a non file-like object"
|
|
28
|
+
raise TypeError(msg)
|
|
29
|
+
if isinstance(obj, io.TextIOBase):
|
|
21
30
|
# Wrap this so that we can return a bytes object from 'read'
|
|
22
|
-
|
|
23
|
-
|
|
31
|
+
obj = BytesIOWrapper(obj)
|
|
32
|
+
path = self._strip_protocol(path)
|
|
33
|
+
self.store[path] = MemoryFile(self, path, obj.read())
|
duckdb/func/__init__.py
ADDED
duckdb/functional/__init__.py
CHANGED
|
@@ -1,17 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
NATIVE,
|
|
7
|
-
ARROW
|
|
8
|
-
)
|
|
1
|
+
"""DuckDB function constants and types. DEPRECATED: please use `duckdb.func` instead."""
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
from duckdb.func import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType
|
|
9
6
|
|
|
10
|
-
__all__ = [
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
]
|
|
7
|
+
__all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"]
|
|
8
|
+
|
|
9
|
+
warnings.warn(
|
|
10
|
+
"`duckdb.functional` is deprecated and will be removed in a future version. Please use `duckdb.func` instead.",
|
|
11
|
+
DeprecationWarning,
|
|
12
|
+
stacklevel=2,
|
|
13
|
+
)
|
duckdb/polars_io.py
CHANGED
|
@@ -1,20 +1,30 @@
|
|
|
1
|
-
import
|
|
2
|
-
import polars as pl
|
|
3
|
-
from typing import Iterator, Optional
|
|
1
|
+
from __future__ import annotations # noqa: D100
|
|
4
2
|
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
import contextlib
|
|
4
|
+
import datetime
|
|
7
5
|
import json
|
|
6
|
+
import typing
|
|
8
7
|
from decimal import Decimal
|
|
9
|
-
import datetime
|
|
10
8
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
9
|
+
import polars as pl
|
|
10
|
+
from polars.io.plugins import register_io_source
|
|
11
|
+
|
|
12
|
+
import duckdb
|
|
13
|
+
|
|
14
|
+
if typing.TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Iterator
|
|
16
|
+
|
|
17
|
+
import typing_extensions
|
|
18
|
+
|
|
19
|
+
_ExpressionTree: typing_extensions.TypeAlias = typing.Dict[str, typing.Union[str, int, "_ExpressionTree", typing.Any]] # noqa: UP006
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _predicate_to_expression(predicate: pl.Expr) -> duckdb.Expression | None:
|
|
23
|
+
"""Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
|
|
24
|
+
|
|
15
25
|
Parameters:
|
|
16
26
|
predicate (pl.Expr): A Polars expression (e.g., col("foo") > 5)
|
|
17
|
-
|
|
27
|
+
|
|
18
28
|
Returns:
|
|
19
29
|
SQLExpression: A DuckDB SQL expression string equivalent.
|
|
20
30
|
None: If conversion fails.
|
|
@@ -25,20 +35,19 @@ def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
|
|
|
25
35
|
"""
|
|
26
36
|
# Serialize the Polars expression tree to JSON
|
|
27
37
|
tree = json.loads(predicate.meta.serialize(format="json"))
|
|
28
|
-
|
|
38
|
+
|
|
29
39
|
try:
|
|
30
40
|
# Convert the tree to SQL
|
|
31
41
|
sql_filter = _pl_tree_to_sql(tree)
|
|
32
|
-
return SQLExpression(sql_filter)
|
|
33
|
-
except:
|
|
42
|
+
return duckdb.SQLExpression(sql_filter)
|
|
43
|
+
except Exception:
|
|
34
44
|
# If the conversion fails, we return None
|
|
35
45
|
return None
|
|
36
46
|
|
|
37
47
|
|
|
38
48
|
def _pl_operation_to_sql(op: str) -> str:
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
|
|
49
|
+
"""Map Polars binary operation strings to SQL equivalents.
|
|
50
|
+
|
|
42
51
|
Example:
|
|
43
52
|
>>> _pl_operation_to_sql("Eq")
|
|
44
53
|
'='
|
|
@@ -55,19 +64,29 @@ def _pl_operation_to_sql(op: str) -> str:
|
|
|
55
64
|
"Or": "OR",
|
|
56
65
|
}[op]
|
|
57
66
|
except KeyError:
|
|
58
|
-
raise NotImplementedError(op)
|
|
67
|
+
raise NotImplementedError(op) # noqa: B904
|
|
68
|
+
|
|
59
69
|
|
|
70
|
+
def _escape_sql_identifier(identifier: str) -> str:
|
|
71
|
+
"""Escape SQL identifiers by doubling any double quotes and wrapping in double quotes.
|
|
60
72
|
|
|
61
|
-
|
|
73
|
+
Example:
|
|
74
|
+
>>> _escape_sql_identifier('column"name')
|
|
75
|
+
'"column""name"'
|
|
62
76
|
"""
|
|
63
|
-
|
|
64
|
-
|
|
77
|
+
escaped = identifier.replace('"', '""')
|
|
78
|
+
return f'"{escaped}"'
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _pl_tree_to_sql(tree: _ExpressionTree) -> str:
|
|
82
|
+
"""Recursively convert a Polars expression tree (as JSON) to a SQL string.
|
|
83
|
+
|
|
65
84
|
Parameters:
|
|
66
85
|
tree (dict): JSON-deserialized expression tree from Polars
|
|
67
|
-
|
|
86
|
+
|
|
68
87
|
Returns:
|
|
69
88
|
str: SQL expression string
|
|
70
|
-
|
|
89
|
+
|
|
71
90
|
Example:
|
|
72
91
|
Input tree:
|
|
73
92
|
{
|
|
@@ -80,35 +99,51 @@ def _pl_tree_to_sql(tree: dict) -> str:
|
|
|
80
99
|
Output: "(foo > 5)"
|
|
81
100
|
"""
|
|
82
101
|
[node_type] = tree.keys()
|
|
83
|
-
subtree = tree[node_type]
|
|
84
102
|
|
|
85
103
|
if node_type == "BinaryExpr":
|
|
86
104
|
# Binary expressions: left OP right
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
")"
|
|
95
|
-
)
|
|
105
|
+
bin_expr_tree = tree[node_type]
|
|
106
|
+
assert isinstance(bin_expr_tree, dict), f"A {node_type} should be a dict but got {type(bin_expr_tree)}"
|
|
107
|
+
lhs, op, rhs = bin_expr_tree["left"], bin_expr_tree["op"], bin_expr_tree["right"]
|
|
108
|
+
assert isinstance(lhs, dict), f"LHS of a {node_type} should be a dict but got {type(lhs)}"
|
|
109
|
+
assert isinstance(op, str), f"The op of a {node_type} should be a str but got {type(op)}"
|
|
110
|
+
assert isinstance(rhs, dict), f"RHS of a {node_type} should be a dict but got {type(rhs)}"
|
|
111
|
+
return f"({_pl_tree_to_sql(lhs)} {_pl_operation_to_sql(op)} {_pl_tree_to_sql(rhs)})"
|
|
96
112
|
if node_type == "Column":
|
|
97
113
|
# A reference to a column name
|
|
98
|
-
|
|
114
|
+
# Wrap in quotes to handle special characters
|
|
115
|
+
col_name = tree[node_type]
|
|
116
|
+
assert isinstance(col_name, str), f"The col name of a {node_type} should be a str but got {type(col_name)}"
|
|
117
|
+
return _escape_sql_identifier(col_name)
|
|
99
118
|
|
|
100
119
|
if node_type in ("Literal", "Dyn"):
|
|
101
120
|
# Recursively process dynamic or literal values
|
|
102
|
-
|
|
121
|
+
val_tree = tree[node_type]
|
|
122
|
+
assert isinstance(val_tree, dict), f"A {node_type} should be a dict but got {type(val_tree)}"
|
|
123
|
+
return _pl_tree_to_sql(val_tree)
|
|
103
124
|
|
|
104
125
|
if node_type == "Int":
|
|
105
126
|
# Direct integer literals
|
|
106
|
-
|
|
127
|
+
int_literal = tree[node_type]
|
|
128
|
+
assert isinstance(int_literal, (int, str)), (
|
|
129
|
+
f"The value of an Int should be an int or str but got {type(int_literal)}"
|
|
130
|
+
)
|
|
131
|
+
return str(int_literal)
|
|
107
132
|
|
|
108
133
|
if node_type == "Function":
|
|
109
134
|
# Handle boolean functions like IsNull, IsNotNull
|
|
110
|
-
|
|
111
|
-
|
|
135
|
+
func_tree = tree[node_type]
|
|
136
|
+
assert isinstance(func_tree, dict), f"A {node_type} should be a dict but got {type(func_tree)}"
|
|
137
|
+
inputs = func_tree["input"]
|
|
138
|
+
assert isinstance(inputs, list), f"A {node_type} should have a list of dicts as input but got {type(inputs)}"
|
|
139
|
+
input_tree = inputs[0]
|
|
140
|
+
assert isinstance(input_tree, dict), (
|
|
141
|
+
f"A {node_type} should have a list of dicts as input but got {type(input_tree)}"
|
|
142
|
+
)
|
|
143
|
+
func_dict = func_tree["function"]
|
|
144
|
+
assert isinstance(func_dict, dict), (
|
|
145
|
+
f"A {node_type} should have a function dict as input but got {type(func_dict)}"
|
|
146
|
+
)
|
|
112
147
|
|
|
113
148
|
if "Boolean" in func_dict:
|
|
114
149
|
func = func_dict["Boolean"]
|
|
@@ -118,91 +153,119 @@ def _pl_tree_to_sql(tree: dict) -> str:
|
|
|
118
153
|
return f"({arg_sql} IS NULL)"
|
|
119
154
|
if func == "IsNotNull":
|
|
120
155
|
return f"({arg_sql} IS NOT NULL)"
|
|
121
|
-
|
|
156
|
+
msg = f"Boolean function not supported: {func}"
|
|
157
|
+
raise NotImplementedError(msg)
|
|
122
158
|
|
|
123
|
-
|
|
159
|
+
msg = f"Unsupported function type: {func_dict}"
|
|
160
|
+
raise NotImplementedError(msg)
|
|
124
161
|
|
|
125
162
|
if node_type == "Scalar":
|
|
126
163
|
# Detect format: old style (dtype/value) or new style (direct type key)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
164
|
+
scalar_tree = tree[node_type]
|
|
165
|
+
assert isinstance(scalar_tree, dict), f"A {node_type} should be a dict but got {type(scalar_tree)}"
|
|
166
|
+
if "dtype" in scalar_tree and "value" in scalar_tree:
|
|
167
|
+
dtype = str(scalar_tree["dtype"])
|
|
168
|
+
value = scalar_tree["value"]
|
|
130
169
|
else:
|
|
131
170
|
# New style: dtype is the single key in the dict
|
|
132
|
-
dtype = next(iter(
|
|
133
|
-
value =
|
|
171
|
+
dtype = next(iter(scalar_tree.keys()))
|
|
172
|
+
value = scalar_tree
|
|
173
|
+
assert isinstance(dtype, str), f"A {node_type} should have a str dtype but got {type(dtype)}"
|
|
174
|
+
assert isinstance(value, dict), f"A {node_type} should have a dict value but got {type(value)}"
|
|
134
175
|
|
|
135
176
|
# Decimal support
|
|
136
177
|
if dtype.startswith("{'Decimal'") or dtype == "Decimal":
|
|
137
|
-
decimal_value = value[
|
|
138
|
-
|
|
139
|
-
|
|
178
|
+
decimal_value = value["Decimal"]
|
|
179
|
+
assert isinstance(decimal_value, list), (
|
|
180
|
+
f"A {dtype} should be a two or three member list but got {type(decimal_value)}"
|
|
181
|
+
)
|
|
182
|
+
assert 2 <= len(decimal_value) <= 3, (
|
|
183
|
+
f"A {dtype} should be a two or three member list but got {len(decimal_value)} member list"
|
|
184
|
+
)
|
|
185
|
+
return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[-1]))
|
|
140
186
|
|
|
141
187
|
# Datetime with microseconds since epoch
|
|
142
188
|
if dtype.startswith("{'Datetime'") or dtype == "Datetime":
|
|
143
|
-
micros = value[
|
|
144
|
-
|
|
145
|
-
|
|
189
|
+
micros = value["Datetime"]
|
|
190
|
+
assert isinstance(micros, list), f"A {dtype} should be a one member list but got {type(micros)}"
|
|
191
|
+
dt_timestamp = datetime.datetime.fromtimestamp(micros[0] / 1_000_000, tz=datetime.timezone.utc)
|
|
192
|
+
return f"'{dt_timestamp!s}'::TIMESTAMP"
|
|
146
193
|
|
|
147
194
|
# Match simple numeric/boolean types
|
|
148
|
-
if dtype in (
|
|
149
|
-
|
|
150
|
-
|
|
195
|
+
if dtype in (
|
|
196
|
+
"Int8",
|
|
197
|
+
"Int16",
|
|
198
|
+
"Int32",
|
|
199
|
+
"Int64",
|
|
200
|
+
"UInt8",
|
|
201
|
+
"UInt16",
|
|
202
|
+
"UInt32",
|
|
203
|
+
"UInt64",
|
|
204
|
+
"Float32",
|
|
205
|
+
"Float64",
|
|
206
|
+
"Boolean",
|
|
207
|
+
):
|
|
151
208
|
return str(value[dtype])
|
|
152
209
|
|
|
153
210
|
# Time type
|
|
154
211
|
if dtype == "Time":
|
|
155
212
|
nanoseconds = value["Time"]
|
|
213
|
+
assert isinstance(nanoseconds, int), f"A {dtype} should be an int but got {type(nanoseconds)}"
|
|
156
214
|
seconds = nanoseconds // 1_000_000_000
|
|
157
215
|
microseconds = (nanoseconds % 1_000_000_000) // 1_000
|
|
158
|
-
dt_time = (datetime.datetime.min + datetime.timedelta(
|
|
159
|
-
seconds=seconds, microseconds=microseconds
|
|
160
|
-
)).time()
|
|
216
|
+
dt_time = (datetime.datetime.min + datetime.timedelta(seconds=seconds, microseconds=microseconds)).time()
|
|
161
217
|
return f"'{dt_time}'::TIME"
|
|
162
218
|
|
|
163
219
|
# Date type
|
|
164
220
|
if dtype == "Date":
|
|
165
221
|
days_since_epoch = value["Date"]
|
|
222
|
+
assert isinstance(days_since_epoch, (float, int)), (
|
|
223
|
+
f"A {dtype} should be a number but got {type(days_since_epoch)}"
|
|
224
|
+
)
|
|
166
225
|
date = datetime.date(1970, 1, 1) + datetime.timedelta(days=days_since_epoch)
|
|
167
226
|
return f"'{date}'::DATE"
|
|
168
227
|
|
|
169
228
|
# Binary type
|
|
170
229
|
if dtype == "Binary":
|
|
171
|
-
|
|
172
|
-
|
|
230
|
+
bin_value = value["Binary"]
|
|
231
|
+
assert isinstance(bin_value, list), f"A {dtype} should be a list but got {type(bin_value)}"
|
|
232
|
+
binary_data = bytes(bin_value)
|
|
233
|
+
escaped = "".join(f"\\x{b:02x}" for b in binary_data)
|
|
173
234
|
return f"'{escaped}'::BLOB"
|
|
174
235
|
|
|
175
236
|
# String type
|
|
176
237
|
if dtype == "String" or dtype == "StringOwned":
|
|
177
238
|
# Some new formats may store directly under StringOwned
|
|
178
|
-
string_val = value.get("StringOwned", value.get("String", None))
|
|
239
|
+
string_val: object | None = value.get("StringOwned", value.get("String", None))
|
|
179
240
|
return f"'{string_val}'"
|
|
180
241
|
|
|
242
|
+
msg = f"Unsupported scalar type {dtype!s}, with value {value}"
|
|
243
|
+
raise NotImplementedError(msg)
|
|
181
244
|
|
|
182
|
-
|
|
245
|
+
msg = f"Node type: {node_type} is not implemented. {tree[node_type]}"
|
|
246
|
+
raise NotImplementedError(msg)
|
|
183
247
|
|
|
184
|
-
raise NotImplementedError(f"Node type: {node_type} is not implemented. {subtree}")
|
|
185
248
|
|
|
186
249
|
def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -> pl.LazyFrame:
|
|
187
|
-
"""
|
|
188
|
-
|
|
189
|
-
"""
|
|
250
|
+
"""A polars IO plugin for DuckDB."""
|
|
251
|
+
|
|
190
252
|
def source_generator(
|
|
191
|
-
with_columns:
|
|
192
|
-
predicate:
|
|
193
|
-
n_rows:
|
|
194
|
-
batch_size:
|
|
253
|
+
with_columns: list[str] | None,
|
|
254
|
+
predicate: pl.Expr | None,
|
|
255
|
+
n_rows: int | None,
|
|
256
|
+
batch_size: int | None,
|
|
195
257
|
) -> Iterator[pl.DataFrame]:
|
|
196
258
|
duck_predicate = None
|
|
197
259
|
relation_final = relation
|
|
198
260
|
if with_columns is not None:
|
|
199
|
-
cols = ",".join(with_columns)
|
|
261
|
+
cols = ",".join(map(_escape_sql_identifier, with_columns))
|
|
200
262
|
relation_final = relation_final.project(cols)
|
|
201
263
|
if n_rows is not None:
|
|
202
264
|
relation_final = relation_final.limit(n_rows)
|
|
203
265
|
if predicate is not None:
|
|
204
266
|
# We have a predicate, if possible, we push it down to DuckDB
|
|
205
|
-
|
|
267
|
+
with contextlib.suppress(AssertionError, KeyError):
|
|
268
|
+
duck_predicate = _predicate_to_expression(predicate)
|
|
206
269
|
# Try to pushdown filter, if one exists
|
|
207
270
|
if duck_predicate is not None:
|
|
208
271
|
relation_final = relation_final.filter(duck_predicate)
|
|
@@ -210,16 +273,12 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -
|
|
|
210
273
|
results = relation_final.fetch_arrow_reader()
|
|
211
274
|
else:
|
|
212
275
|
results = relation_final.fetch_arrow_reader(batch_size)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
else:
|
|
221
|
-
yield pl.from_arrow(record_batch)
|
|
222
|
-
except StopIteration:
|
|
223
|
-
break
|
|
276
|
+
|
|
277
|
+
for record_batch in iter(results.read_next_batch, None):
|
|
278
|
+
if predicate is not None and duck_predicate is None:
|
|
279
|
+
# We have a predicate, but did not manage to push it down, we fallback here
|
|
280
|
+
yield pl.from_arrow(record_batch).filter(predicate) # type: ignore[arg-type,misc,unused-ignore]
|
|
281
|
+
else:
|
|
282
|
+
yield pl.from_arrow(record_batch) # type: ignore[misc,unused-ignore]
|
|
224
283
|
|
|
225
284
|
return register_io_source(source_generator, schema=schema)
|