duckdb 1.4.1.dev135__cp312-cp312-macosx_10_13_universal2.whl → 1.5.0.dev44__cp312-cp312-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckdb might be problematic. Click here for more details.
- _duckdb.cpython-312-darwin.so +0 -0
- duckdb/__init__.py +435 -341
- duckdb/__init__.pyi +713 -0
- duckdb/bytes_io_wrapper.py +9 -12
- duckdb/experimental/__init__.py +1 -2
- duckdb/experimental/spark/__init__.py +4 -3
- duckdb/experimental/spark/_globals.py +8 -8
- duckdb/experimental/spark/_typing.py +9 -7
- duckdb/experimental/spark/conf.py +15 -16
- duckdb/experimental/spark/context.py +44 -60
- duckdb/experimental/spark/errors/__init__.py +35 -33
- duckdb/experimental/spark/errors/error_classes.py +1 -1
- duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
- duckdb/experimental/spark/errors/exceptions/base.py +88 -39
- duckdb/experimental/spark/errors/utils.py +16 -11
- duckdb/experimental/spark/exception.py +6 -9
- duckdb/experimental/spark/sql/__init__.py +5 -5
- duckdb/experimental/spark/sql/_typing.py +15 -8
- duckdb/experimental/spark/sql/catalog.py +20 -21
- duckdb/experimental/spark/sql/column.py +55 -48
- duckdb/experimental/spark/sql/conf.py +8 -9
- duckdb/experimental/spark/sql/dataframe.py +233 -185
- duckdb/experimental/spark/sql/functions.py +1248 -1222
- duckdb/experimental/spark/sql/group.py +52 -56
- duckdb/experimental/spark/sql/readwriter.py +94 -80
- duckdb/experimental/spark/sql/session.py +59 -64
- duckdb/experimental/spark/sql/streaming.py +10 -9
- duckdb/experimental/spark/sql/type_utils.py +65 -67
- duckdb/experimental/spark/sql/types.py +345 -309
- duckdb/experimental/spark/sql/udf.py +6 -6
- duckdb/filesystem.py +16 -26
- duckdb/functional/__init__.py +16 -12
- duckdb/functional/__init__.pyi +31 -0
- duckdb/polars_io.py +82 -124
- duckdb/query_graph/__main__.py +96 -91
- duckdb/typing/__init__.py +8 -18
- duckdb/typing/__init__.pyi +36 -0
- duckdb/udf.py +5 -10
- duckdb/value/__init__.py +0 -1
- duckdb/value/constant/__init__.py +60 -62
- duckdb/value/constant/__init__.pyi +115 -0
- duckdb-1.5.0.dev44.dist-info/METADATA +80 -0
- duckdb-1.5.0.dev44.dist-info/RECORD +47 -0
- _duckdb-stubs/__init__.pyi +0 -1443
- _duckdb-stubs/_func.pyi +0 -46
- _duckdb-stubs/_sqltypes.pyi +0 -75
- adbc_driver_duckdb/__init__.py +0 -50
- adbc_driver_duckdb/dbapi.py +0 -115
- duckdb/_dbapi_type_object.py +0 -231
- duckdb/_version.py +0 -22
- duckdb/func/__init__.py +0 -3
- duckdb/sqltypes/__init__.py +0 -63
- duckdb-1.4.1.dev135.dist-info/METADATA +0 -326
- duckdb-1.4.1.dev135.dist-info/RECORD +0 -52
- /duckdb/{py.typed → value/__init__.pyi} +0 -0
- {duckdb-1.4.1.dev135.dist-info → duckdb-1.5.0.dev44.dist-info}/WHEEL +0 -0
- {duckdb-1.4.1.dev135.dist-info → duckdb-1.5.0.dev44.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/
|
|
1
|
+
# https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/
|
|
2
2
|
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
|
|
3
3
|
|
|
4
4
|
from .types import DataType
|
|
@@ -10,11 +10,11 @@ DataTypeOrString = Union[DataType, str]
|
|
|
10
10
|
UserDefinedFunctionLike = TypeVar("UserDefinedFunctionLike")
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
class UDFRegistration:
|
|
14
|
-
def __init__(self, sparkSession: "SparkSession")
|
|
13
|
+
class UDFRegistration:
|
|
14
|
+
def __init__(self, sparkSession: "SparkSession"):
|
|
15
15
|
self.sparkSession = sparkSession
|
|
16
16
|
|
|
17
|
-
def register(
|
|
17
|
+
def register(
|
|
18
18
|
self,
|
|
19
19
|
name: str,
|
|
20
20
|
f: Union[Callable[..., Any], "UserDefinedFunctionLike"],
|
|
@@ -22,7 +22,7 @@ class UDFRegistration: # noqa: D101
|
|
|
22
22
|
) -> "UserDefinedFunctionLike":
|
|
23
23
|
self.sparkSession.conn.create_function(name, f, return_type=returnType)
|
|
24
24
|
|
|
25
|
-
def registerJavaFunction(
|
|
25
|
+
def registerJavaFunction(
|
|
26
26
|
self,
|
|
27
27
|
name: str,
|
|
28
28
|
javaClassName: str,
|
|
@@ -30,7 +30,7 @@ class UDFRegistration: # noqa: D101
|
|
|
30
30
|
) -> None:
|
|
31
31
|
raise NotImplementedError
|
|
32
32
|
|
|
33
|
-
def registerJavaUDAF(self, name: str, javaClassName: str) -> None:
|
|
33
|
+
def registerJavaUDAF(self, name: str, javaClassName: str) -> None:
|
|
34
34
|
raise NotImplementedError
|
|
35
35
|
|
|
36
36
|
|
duckdb/filesystem.py
CHANGED
|
@@ -1,33 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
Warning: Not for external use. May change at any moment. Likely to be made internal.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from __future__ import annotations
|
|
7
|
-
|
|
8
|
-
import io
|
|
9
|
-
import typing
|
|
10
|
-
|
|
11
|
-
from fsspec import AbstractFileSystem
|
|
12
|
-
from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
|
|
13
|
-
|
|
1
|
+
from fsspec import filesystem, AbstractFileSystem
|
|
2
|
+
from fsspec.implementations.memory import MemoryFileSystem, MemoryFile
|
|
14
3
|
from .bytes_io_wrapper import BytesIOWrapper
|
|
4
|
+
from io import TextIOBase
|
|
15
5
|
|
|
6
|
+
def is_file_like(obj):
|
|
7
|
+
# We only care that we can read from the file
|
|
8
|
+
return hasattr(obj, "read") and hasattr(obj, "seek")
|
|
16
9
|
|
|
17
|
-
class ModifiedMemoryFileSystem(MemoryFileSystem):
|
|
18
|
-
"""In-memory filesystem implementation that uses its own protocol."""
|
|
19
10
|
|
|
20
|
-
|
|
11
|
+
class ModifiedMemoryFileSystem(MemoryFileSystem):
|
|
12
|
+
protocol = ('DUCKDB_INTERNAL_OBJECTSTORE',)
|
|
21
13
|
# defer to the original implementation that doesn't hardcode the protocol
|
|
22
|
-
_strip_protocol
|
|
14
|
+
_strip_protocol = classmethod(AbstractFileSystem._strip_protocol.__func__)
|
|
23
15
|
|
|
24
|
-
def add_file(self,
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
msg = "Can not read from a non file-like object"
|
|
28
|
-
raise TypeError(msg)
|
|
29
|
-
if isinstance(obj, io.TextIOBase):
|
|
30
|
-
# Wrap this so that we can return a bytes object from 'read'
|
|
31
|
-
obj = BytesIOWrapper(obj)
|
|
16
|
+
def add_file(self, object, path):
|
|
17
|
+
if not is_file_like(object):
|
|
18
|
+
raise ValueError("Can not read from a non file-like object")
|
|
32
19
|
path = self._strip_protocol(path)
|
|
33
|
-
|
|
20
|
+
if isinstance(object, TextIOBase):
|
|
21
|
+
# Wrap this so that we can return a bytes object from 'read'
|
|
22
|
+
object = BytesIOWrapper(object)
|
|
23
|
+
self.store[path] = MemoryFile(self, path, object.read())
|
duckdb/functional/__init__.py
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
warnings.warn(
|
|
10
|
-
"`duckdb.functional` is deprecated and will be removed in a future version. Please use `duckdb.func` instead.",
|
|
11
|
-
DeprecationWarning,
|
|
12
|
-
stacklevel=2,
|
|
1
|
+
from _duckdb.functional import (
|
|
2
|
+
FunctionNullHandling,
|
|
3
|
+
PythonUDFType,
|
|
4
|
+
SPECIAL,
|
|
5
|
+
DEFAULT,
|
|
6
|
+
NATIVE,
|
|
7
|
+
ARROW
|
|
13
8
|
)
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"FunctionNullHandling",
|
|
12
|
+
"PythonUDFType",
|
|
13
|
+
"SPECIAL",
|
|
14
|
+
"DEFAULT",
|
|
15
|
+
"NATIVE",
|
|
16
|
+
"ARROW"
|
|
17
|
+
]
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
SPECIAL: FunctionNullHandling
|
|
4
|
+
DEFAULT: FunctionNullHandling
|
|
5
|
+
|
|
6
|
+
NATIVE: PythonUDFType
|
|
7
|
+
ARROW: PythonUDFType
|
|
8
|
+
|
|
9
|
+
class FunctionNullHandling:
|
|
10
|
+
DEFAULT: FunctionNullHandling
|
|
11
|
+
SPECIAL: FunctionNullHandling
|
|
12
|
+
def __int__(self) -> int: ...
|
|
13
|
+
def __index__(self) -> int: ...
|
|
14
|
+
@property
|
|
15
|
+
def __members__(self) -> Dict[str, FunctionNullHandling]: ...
|
|
16
|
+
@property
|
|
17
|
+
def name(self) -> str: ...
|
|
18
|
+
@property
|
|
19
|
+
def value(self) -> int: ...
|
|
20
|
+
|
|
21
|
+
class PythonUDFType:
|
|
22
|
+
NATIVE: PythonUDFType
|
|
23
|
+
ARROW: PythonUDFType
|
|
24
|
+
def __int__(self) -> int: ...
|
|
25
|
+
def __index__(self) -> int: ...
|
|
26
|
+
@property
|
|
27
|
+
def __members__(self) -> Dict[str, PythonUDFType]: ...
|
|
28
|
+
@property
|
|
29
|
+
def name(self) -> str: ...
|
|
30
|
+
@property
|
|
31
|
+
def value(self) -> int: ...
|
duckdb/polars_io.py
CHANGED
|
@@ -1,29 +1,20 @@
|
|
|
1
|
-
|
|
1
|
+
import duckdb
|
|
2
|
+
import polars as pl
|
|
3
|
+
from typing import Iterator, Optional
|
|
2
4
|
|
|
3
|
-
import
|
|
5
|
+
from polars.io.plugins import register_io_source
|
|
6
|
+
from duckdb import SQLExpression
|
|
4
7
|
import json
|
|
5
|
-
import typing
|
|
6
8
|
from decimal import Decimal
|
|
9
|
+
import datetime
|
|
7
10
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
if typing.TYPE_CHECKING:
|
|
14
|
-
from collections.abc import Iterator
|
|
15
|
-
|
|
16
|
-
import typing_extensions
|
|
17
|
-
|
|
18
|
-
_ExpressionTree: typing_extensions.TypeAlias = typing.Dict[str, typing.Union[str, int, "_ExpressionTree", typing.Any]] # noqa: UP006
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def _predicate_to_expression(predicate: pl.Expr) -> duckdb.Expression | None:
|
|
22
|
-
"""Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
|
|
23
|
-
|
|
11
|
+
def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
|
|
12
|
+
"""
|
|
13
|
+
Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
|
|
14
|
+
|
|
24
15
|
Parameters:
|
|
25
16
|
predicate (pl.Expr): A Polars expression (e.g., col("foo") > 5)
|
|
26
|
-
|
|
17
|
+
|
|
27
18
|
Returns:
|
|
28
19
|
SQLExpression: A DuckDB SQL expression string equivalent.
|
|
29
20
|
None: If conversion fails.
|
|
@@ -34,19 +25,20 @@ def _predicate_to_expression(predicate: pl.Expr) -> duckdb.Expression | None:
|
|
|
34
25
|
"""
|
|
35
26
|
# Serialize the Polars expression tree to JSON
|
|
36
27
|
tree = json.loads(predicate.meta.serialize(format="json"))
|
|
37
|
-
|
|
28
|
+
|
|
38
29
|
try:
|
|
39
30
|
# Convert the tree to SQL
|
|
40
31
|
sql_filter = _pl_tree_to_sql(tree)
|
|
41
|
-
return
|
|
42
|
-
except
|
|
32
|
+
return SQLExpression(sql_filter)
|
|
33
|
+
except:
|
|
43
34
|
# If the conversion fails, we return None
|
|
44
35
|
return None
|
|
45
36
|
|
|
46
37
|
|
|
47
38
|
def _pl_operation_to_sql(op: str) -> str:
|
|
48
|
-
"""
|
|
49
|
-
|
|
39
|
+
"""
|
|
40
|
+
Map Polars binary operation strings to SQL equivalents.
|
|
41
|
+
|
|
50
42
|
Example:
|
|
51
43
|
>>> _pl_operation_to_sql("Eq")
|
|
52
44
|
'='
|
|
@@ -63,11 +55,12 @@ def _pl_operation_to_sql(op: str) -> str:
|
|
|
63
55
|
"Or": "OR",
|
|
64
56
|
}[op]
|
|
65
57
|
except KeyError:
|
|
66
|
-
raise NotImplementedError(op)
|
|
58
|
+
raise NotImplementedError(op)
|
|
67
59
|
|
|
68
60
|
|
|
69
61
|
def _escape_sql_identifier(identifier: str) -> str:
|
|
70
|
-
"""
|
|
62
|
+
"""
|
|
63
|
+
Escape SQL identifiers by doubling any double quotes and wrapping in double quotes.
|
|
71
64
|
|
|
72
65
|
Example:
|
|
73
66
|
>>> _escape_sql_identifier('column"name')
|
|
@@ -77,15 +70,16 @@ def _escape_sql_identifier(identifier: str) -> str:
|
|
|
77
70
|
return f'"{escaped}"'
|
|
78
71
|
|
|
79
72
|
|
|
80
|
-
def _pl_tree_to_sql(tree:
|
|
81
|
-
"""
|
|
82
|
-
|
|
73
|
+
def _pl_tree_to_sql(tree: dict) -> str:
|
|
74
|
+
"""
|
|
75
|
+
Recursively convert a Polars expression tree (as JSON) to a SQL string.
|
|
76
|
+
|
|
83
77
|
Parameters:
|
|
84
78
|
tree (dict): JSON-deserialized expression tree from Polars
|
|
85
|
-
|
|
79
|
+
|
|
86
80
|
Returns:
|
|
87
81
|
str: SQL expression string
|
|
88
|
-
|
|
82
|
+
|
|
89
83
|
Example:
|
|
90
84
|
Input tree:
|
|
91
85
|
{
|
|
@@ -98,51 +92,36 @@ def _pl_tree_to_sql(tree: _ExpressionTree) -> str:
|
|
|
98
92
|
Output: "(foo > 5)"
|
|
99
93
|
"""
|
|
100
94
|
[node_type] = tree.keys()
|
|
95
|
+
subtree = tree[node_type]
|
|
101
96
|
|
|
102
97
|
if node_type == "BinaryExpr":
|
|
103
98
|
# Binary expressions: left OP right
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
99
|
+
return (
|
|
100
|
+
"(" +
|
|
101
|
+
" ".join((
|
|
102
|
+
_pl_tree_to_sql(subtree['left']),
|
|
103
|
+
_pl_operation_to_sql(subtree['op']),
|
|
104
|
+
_pl_tree_to_sql(subtree['right'])
|
|
105
|
+
)) +
|
|
106
|
+
")"
|
|
107
|
+
)
|
|
111
108
|
if node_type == "Column":
|
|
112
109
|
# A reference to a column name
|
|
113
110
|
# Wrap in quotes to handle special characters
|
|
114
|
-
|
|
115
|
-
assert isinstance(col_name, str), f"The col name of a {node_type} should be a str but got {type(col_name)}"
|
|
116
|
-
return _escape_sql_identifier(col_name)
|
|
111
|
+
return _escape_sql_identifier(subtree)
|
|
117
112
|
|
|
118
113
|
if node_type in ("Literal", "Dyn"):
|
|
119
114
|
# Recursively process dynamic or literal values
|
|
120
|
-
|
|
121
|
-
assert isinstance(val_tree, dict), f"A {node_type} should be a dict but got {type(val_tree)}"
|
|
122
|
-
return _pl_tree_to_sql(val_tree)
|
|
115
|
+
return _pl_tree_to_sql(subtree)
|
|
123
116
|
|
|
124
117
|
if node_type == "Int":
|
|
125
118
|
# Direct integer literals
|
|
126
|
-
|
|
127
|
-
assert isinstance(int_literal, (int, str)), (
|
|
128
|
-
f"The value of an Int should be an int or str but got {type(int_literal)}"
|
|
129
|
-
)
|
|
130
|
-
return str(int_literal)
|
|
119
|
+
return str(subtree)
|
|
131
120
|
|
|
132
121
|
if node_type == "Function":
|
|
133
122
|
# Handle boolean functions like IsNull, IsNotNull
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
inputs = func_tree["input"]
|
|
137
|
-
assert isinstance(inputs, list), f"A {node_type} should have a list of dicts as input but got {type(inputs)}"
|
|
138
|
-
input_tree = inputs[0]
|
|
139
|
-
assert isinstance(input_tree, dict), (
|
|
140
|
-
f"A {node_type} should have a list of dicts as input but got {type(input_tree)}"
|
|
141
|
-
)
|
|
142
|
-
func_dict = func_tree["function"]
|
|
143
|
-
assert isinstance(func_dict, dict), (
|
|
144
|
-
f"A {node_type} should have a function dict as input but got {type(func_dict)}"
|
|
145
|
-
)
|
|
123
|
+
inputs = subtree["input"]
|
|
124
|
+
func_dict = subtree["function"]
|
|
146
125
|
|
|
147
126
|
if "Boolean" in func_dict:
|
|
148
127
|
func = func_dict["Boolean"]
|
|
@@ -152,104 +131,80 @@ def _pl_tree_to_sql(tree: _ExpressionTree) -> str:
|
|
|
152
131
|
return f"({arg_sql} IS NULL)"
|
|
153
132
|
if func == "IsNotNull":
|
|
154
133
|
return f"({arg_sql} IS NOT NULL)"
|
|
155
|
-
|
|
156
|
-
raise NotImplementedError(msg)
|
|
134
|
+
raise NotImplementedError(f"Boolean function not supported: {func}")
|
|
157
135
|
|
|
158
|
-
|
|
159
|
-
raise NotImplementedError(msg)
|
|
136
|
+
raise NotImplementedError(f"Unsupported function type: {func_dict}")
|
|
160
137
|
|
|
161
138
|
if node_type == "Scalar":
|
|
162
139
|
# Detect format: old style (dtype/value) or new style (direct type key)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
dtype = str(scalar_tree["dtype"])
|
|
167
|
-
value = scalar_tree["value"]
|
|
140
|
+
if "dtype" in subtree and "value" in subtree:
|
|
141
|
+
dtype = str(subtree["dtype"])
|
|
142
|
+
value = subtree["value"]
|
|
168
143
|
else:
|
|
169
144
|
# New style: dtype is the single key in the dict
|
|
170
|
-
dtype = next(iter(
|
|
171
|
-
value =
|
|
172
|
-
assert isinstance(dtype, str), f"A {node_type} should have a str dtype but got {type(dtype)}"
|
|
173
|
-
assert isinstance(value, dict), f"A {node_type} should have a dict value but got {type(value)}"
|
|
145
|
+
dtype = next(iter(subtree.keys()))
|
|
146
|
+
value = subtree
|
|
174
147
|
|
|
175
148
|
# Decimal support
|
|
176
149
|
if dtype.startswith("{'Decimal'") or dtype == "Decimal":
|
|
177
|
-
decimal_value = value[
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
)
|
|
181
|
-
return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1]))
|
|
150
|
+
decimal_value = value['Decimal']
|
|
151
|
+
decimal_value = Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1])
|
|
152
|
+
return str(decimal_value)
|
|
182
153
|
|
|
183
154
|
# Datetime with microseconds since epoch
|
|
184
155
|
if dtype.startswith("{'Datetime'") or dtype == "Datetime":
|
|
185
|
-
micros = value[
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
return f"'{dt_timestamp!s}'::TIMESTAMP"
|
|
156
|
+
micros = value['Datetime'][0]
|
|
157
|
+
dt_timestamp = datetime.datetime.fromtimestamp(micros / 1_000_000, tz=datetime.UTC)
|
|
158
|
+
return f"'{str(dt_timestamp)}'::TIMESTAMP"
|
|
189
159
|
|
|
190
160
|
# Match simple numeric/boolean types
|
|
191
|
-
if dtype in (
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
"Int32",
|
|
195
|
-
"Int64",
|
|
196
|
-
"UInt8",
|
|
197
|
-
"UInt16",
|
|
198
|
-
"UInt32",
|
|
199
|
-
"UInt64",
|
|
200
|
-
"Float32",
|
|
201
|
-
"Float64",
|
|
202
|
-
"Boolean",
|
|
203
|
-
):
|
|
161
|
+
if dtype in ("Int8", "Int16", "Int32", "Int64",
|
|
162
|
+
"UInt8", "UInt16", "UInt32", "UInt64",
|
|
163
|
+
"Float32", "Float64", "Boolean"):
|
|
204
164
|
return str(value[dtype])
|
|
205
165
|
|
|
206
166
|
# Time type
|
|
207
167
|
if dtype == "Time":
|
|
208
168
|
nanoseconds = value["Time"]
|
|
209
|
-
assert isinstance(nanoseconds, int), f"A {dtype} should be an int but got {type(nanoseconds)}"
|
|
210
169
|
seconds = nanoseconds // 1_000_000_000
|
|
211
170
|
microseconds = (nanoseconds % 1_000_000_000) // 1_000
|
|
212
|
-
dt_time = (datetime.datetime.min + datetime.timedelta(
|
|
171
|
+
dt_time = (datetime.datetime.min + datetime.timedelta(
|
|
172
|
+
seconds=seconds, microseconds=microseconds
|
|
173
|
+
)).time()
|
|
213
174
|
return f"'{dt_time}'::TIME"
|
|
214
175
|
|
|
215
176
|
# Date type
|
|
216
177
|
if dtype == "Date":
|
|
217
178
|
days_since_epoch = value["Date"]
|
|
218
|
-
assert isinstance(days_since_epoch, (float, int)), (
|
|
219
|
-
f"A {dtype} should be a number but got {type(days_since_epoch)}"
|
|
220
|
-
)
|
|
221
179
|
date = datetime.date(1970, 1, 1) + datetime.timedelta(days=days_since_epoch)
|
|
222
180
|
return f"'{date}'::DATE"
|
|
223
181
|
|
|
224
182
|
# Binary type
|
|
225
183
|
if dtype == "Binary":
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
binary_data = bytes(bin_value)
|
|
229
|
-
escaped = "".join(f"\\x{b:02x}" for b in binary_data)
|
|
184
|
+
binary_data = bytes(value["Binary"])
|
|
185
|
+
escaped = ''.join(f'\\x{b:02x}' for b in binary_data)
|
|
230
186
|
return f"'{escaped}'::BLOB"
|
|
231
187
|
|
|
232
188
|
# String type
|
|
233
189
|
if dtype == "String" or dtype == "StringOwned":
|
|
234
190
|
# Some new formats may store directly under StringOwned
|
|
235
|
-
string_val
|
|
191
|
+
string_val = value.get("StringOwned", value.get("String", None))
|
|
236
192
|
return f"'{string_val}'"
|
|
237
193
|
|
|
238
|
-
msg = f"Unsupported scalar type {dtype!s}, with value {value}"
|
|
239
|
-
raise NotImplementedError(msg)
|
|
240
194
|
|
|
241
|
-
|
|
242
|
-
raise NotImplementedError(msg)
|
|
195
|
+
raise NotImplementedError(f"Unsupported scalar type {str(dtype)}, with value {value}")
|
|
243
196
|
|
|
197
|
+
raise NotImplementedError(f"Node type: {node_type} is not implemented. {subtree}")
|
|
244
198
|
|
|
245
199
|
def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -> pl.LazyFrame:
|
|
246
|
-
"""
|
|
247
|
-
|
|
200
|
+
"""
|
|
201
|
+
A polars IO plugin for DuckDB.
|
|
202
|
+
"""
|
|
248
203
|
def source_generator(
|
|
249
|
-
with_columns: list[str]
|
|
250
|
-
predicate: pl.Expr
|
|
251
|
-
n_rows: int
|
|
252
|
-
batch_size: int
|
|
204
|
+
with_columns: Optional[list[str]],
|
|
205
|
+
predicate: Optional[pl.Expr],
|
|
206
|
+
n_rows: Optional[int],
|
|
207
|
+
batch_size: Optional[int],
|
|
253
208
|
) -> Iterator[pl.DataFrame]:
|
|
254
209
|
duck_predicate = None
|
|
255
210
|
relation_final = relation
|
|
@@ -268,12 +223,15 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -
|
|
|
268
223
|
results = relation_final.fetch_arrow_reader()
|
|
269
224
|
else:
|
|
270
225
|
results = relation_final.fetch_arrow_reader(batch_size)
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
226
|
+
while True:
|
|
227
|
+
try:
|
|
228
|
+
record_batch = results.read_next_batch()
|
|
229
|
+
if predicate is not None and duck_predicate is None:
|
|
230
|
+
# We have a predicate, but did not manage to push it down, we fallback here
|
|
231
|
+
yield pl.from_arrow(record_batch).filter(predicate)
|
|
232
|
+
else:
|
|
233
|
+
yield pl.from_arrow(record_batch)
|
|
234
|
+
except StopIteration:
|
|
235
|
+
break
|
|
278
236
|
|
|
279
237
|
return register_io_source(source_generator, schema=schema)
|