duckdb 1.4.1__cp39-cp39-macosx_10_9_universal2.whl → 1.5.0.dev44__cp39-cp39-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckdb might be problematic. Click here for more details.
- _duckdb.cpython-39-darwin.so +0 -0
- duckdb/__init__.py +435 -341
- duckdb/__init__.pyi +713 -0
- duckdb/bytes_io_wrapper.py +9 -12
- duckdb/experimental/__init__.py +1 -2
- duckdb/experimental/spark/__init__.py +4 -3
- duckdb/experimental/spark/_globals.py +8 -8
- duckdb/experimental/spark/_typing.py +9 -7
- duckdb/experimental/spark/conf.py +15 -16
- duckdb/experimental/spark/context.py +44 -60
- duckdb/experimental/spark/errors/__init__.py +35 -33
- duckdb/experimental/spark/errors/error_classes.py +1 -1
- duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
- duckdb/experimental/spark/errors/exceptions/base.py +88 -39
- duckdb/experimental/spark/errors/utils.py +16 -11
- duckdb/experimental/spark/exception.py +6 -9
- duckdb/experimental/spark/sql/__init__.py +5 -5
- duckdb/experimental/spark/sql/_typing.py +15 -8
- duckdb/experimental/spark/sql/catalog.py +20 -21
- duckdb/experimental/spark/sql/column.py +55 -48
- duckdb/experimental/spark/sql/conf.py +8 -9
- duckdb/experimental/spark/sql/dataframe.py +233 -185
- duckdb/experimental/spark/sql/functions.py +1248 -1222
- duckdb/experimental/spark/sql/group.py +52 -56
- duckdb/experimental/spark/sql/readwriter.py +94 -80
- duckdb/experimental/spark/sql/session.py +59 -64
- duckdb/experimental/spark/sql/streaming.py +10 -9
- duckdb/experimental/spark/sql/type_utils.py +65 -67
- duckdb/experimental/spark/sql/types.py +345 -309
- duckdb/experimental/spark/sql/udf.py +6 -6
- duckdb/filesystem.py +16 -26
- duckdb/functional/__init__.py +16 -12
- duckdb/functional/__init__.pyi +31 -0
- duckdb/polars_io.py +83 -130
- duckdb/query_graph/__main__.py +96 -91
- duckdb/typing/__init__.py +8 -18
- duckdb/typing/__init__.pyi +36 -0
- duckdb/udf.py +5 -10
- duckdb/value/__init__.py +0 -1
- duckdb/value/constant/__init__.py +60 -62
- duckdb/value/constant/__init__.pyi +115 -0
- duckdb-1.5.0.dev44.dist-info/METADATA +80 -0
- duckdb-1.5.0.dev44.dist-info/RECORD +47 -0
- _duckdb-stubs/__init__.pyi +0 -1443
- _duckdb-stubs/_func.pyi +0 -46
- _duckdb-stubs/_sqltypes.pyi +0 -75
- adbc_driver_duckdb/__init__.py +0 -50
- adbc_driver_duckdb/dbapi.py +0 -115
- duckdb/_dbapi_type_object.py +0 -231
- duckdb/_version.py +0 -22
- duckdb/func/__init__.py +0 -3
- duckdb/sqltypes/__init__.py +0 -63
- duckdb-1.4.1.dist-info/METADATA +0 -326
- duckdb-1.4.1.dist-info/RECORD +0 -52
- /duckdb/{py.typed → value/__init__.pyi} +0 -0
- {duckdb-1.4.1.dist-info → duckdb-1.5.0.dev44.dist-info}/WHEEL +0 -0
- {duckdb-1.4.1.dist-info → duckdb-1.5.0.dev44.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/
|
|
1
|
+
# https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/
|
|
2
2
|
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
|
|
3
3
|
|
|
4
4
|
from .types import DataType
|
|
@@ -10,11 +10,11 @@ DataTypeOrString = Union[DataType, str]
|
|
|
10
10
|
UserDefinedFunctionLike = TypeVar("UserDefinedFunctionLike")
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
class UDFRegistration:
|
|
14
|
-
def __init__(self, sparkSession: "SparkSession")
|
|
13
|
+
class UDFRegistration:
|
|
14
|
+
def __init__(self, sparkSession: "SparkSession"):
|
|
15
15
|
self.sparkSession = sparkSession
|
|
16
16
|
|
|
17
|
-
def register(
|
|
17
|
+
def register(
|
|
18
18
|
self,
|
|
19
19
|
name: str,
|
|
20
20
|
f: Union[Callable[..., Any], "UserDefinedFunctionLike"],
|
|
@@ -22,7 +22,7 @@ class UDFRegistration: # noqa: D101
|
|
|
22
22
|
) -> "UserDefinedFunctionLike":
|
|
23
23
|
self.sparkSession.conn.create_function(name, f, return_type=returnType)
|
|
24
24
|
|
|
25
|
-
def registerJavaFunction(
|
|
25
|
+
def registerJavaFunction(
|
|
26
26
|
self,
|
|
27
27
|
name: str,
|
|
28
28
|
javaClassName: str,
|
|
@@ -30,7 +30,7 @@ class UDFRegistration: # noqa: D101
|
|
|
30
30
|
) -> None:
|
|
31
31
|
raise NotImplementedError
|
|
32
32
|
|
|
33
|
-
def registerJavaUDAF(self, name: str, javaClassName: str) -> None:
|
|
33
|
+
def registerJavaUDAF(self, name: str, javaClassName: str) -> None:
|
|
34
34
|
raise NotImplementedError
|
|
35
35
|
|
|
36
36
|
|
duckdb/filesystem.py
CHANGED
|
@@ -1,33 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
Warning: Not for external use. May change at any moment. Likely to be made internal.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from __future__ import annotations
|
|
7
|
-
|
|
8
|
-
import io
|
|
9
|
-
import typing
|
|
10
|
-
|
|
11
|
-
from fsspec import AbstractFileSystem
|
|
12
|
-
from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
|
|
13
|
-
|
|
1
|
+
from fsspec import filesystem, AbstractFileSystem
|
|
2
|
+
from fsspec.implementations.memory import MemoryFileSystem, MemoryFile
|
|
14
3
|
from .bytes_io_wrapper import BytesIOWrapper
|
|
4
|
+
from io import TextIOBase
|
|
15
5
|
|
|
6
|
+
def is_file_like(obj):
|
|
7
|
+
# We only care that we can read from the file
|
|
8
|
+
return hasattr(obj, "read") and hasattr(obj, "seek")
|
|
16
9
|
|
|
17
|
-
class ModifiedMemoryFileSystem(MemoryFileSystem):
|
|
18
|
-
"""In-memory filesystem implementation that uses its own protocol."""
|
|
19
10
|
|
|
20
|
-
|
|
11
|
+
class ModifiedMemoryFileSystem(MemoryFileSystem):
|
|
12
|
+
protocol = ('DUCKDB_INTERNAL_OBJECTSTORE',)
|
|
21
13
|
# defer to the original implementation that doesn't hardcode the protocol
|
|
22
|
-
_strip_protocol
|
|
14
|
+
_strip_protocol = classmethod(AbstractFileSystem._strip_protocol.__func__)
|
|
23
15
|
|
|
24
|
-
def add_file(self,
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
msg = "Can not read from a non file-like object"
|
|
28
|
-
raise TypeError(msg)
|
|
29
|
-
if isinstance(obj, io.TextIOBase):
|
|
30
|
-
# Wrap this so that we can return a bytes object from 'read'
|
|
31
|
-
obj = BytesIOWrapper(obj)
|
|
16
|
+
def add_file(self, object, path):
|
|
17
|
+
if not is_file_like(object):
|
|
18
|
+
raise ValueError("Can not read from a non file-like object")
|
|
32
19
|
path = self._strip_protocol(path)
|
|
33
|
-
|
|
20
|
+
if isinstance(object, TextIOBase):
|
|
21
|
+
# Wrap this so that we can return a bytes object from 'read'
|
|
22
|
+
object = BytesIOWrapper(object)
|
|
23
|
+
self.store[path] = MemoryFile(self, path, object.read())
|
duckdb/functional/__init__.py
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
warnings.warn(
|
|
10
|
-
"`duckdb.functional` is deprecated and will be removed in a future version. Please use `duckdb.func` instead.",
|
|
11
|
-
DeprecationWarning,
|
|
12
|
-
stacklevel=2,
|
|
1
|
+
from _duckdb.functional import (
|
|
2
|
+
FunctionNullHandling,
|
|
3
|
+
PythonUDFType,
|
|
4
|
+
SPECIAL,
|
|
5
|
+
DEFAULT,
|
|
6
|
+
NATIVE,
|
|
7
|
+
ARROW
|
|
13
8
|
)
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"FunctionNullHandling",
|
|
12
|
+
"PythonUDFType",
|
|
13
|
+
"SPECIAL",
|
|
14
|
+
"DEFAULT",
|
|
15
|
+
"NATIVE",
|
|
16
|
+
"ARROW"
|
|
17
|
+
]
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
SPECIAL: FunctionNullHandling
|
|
4
|
+
DEFAULT: FunctionNullHandling
|
|
5
|
+
|
|
6
|
+
NATIVE: PythonUDFType
|
|
7
|
+
ARROW: PythonUDFType
|
|
8
|
+
|
|
9
|
+
class FunctionNullHandling:
|
|
10
|
+
DEFAULT: FunctionNullHandling
|
|
11
|
+
SPECIAL: FunctionNullHandling
|
|
12
|
+
def __int__(self) -> int: ...
|
|
13
|
+
def __index__(self) -> int: ...
|
|
14
|
+
@property
|
|
15
|
+
def __members__(self) -> Dict[str, FunctionNullHandling]: ...
|
|
16
|
+
@property
|
|
17
|
+
def name(self) -> str: ...
|
|
18
|
+
@property
|
|
19
|
+
def value(self) -> int: ...
|
|
20
|
+
|
|
21
|
+
class PythonUDFType:
|
|
22
|
+
NATIVE: PythonUDFType
|
|
23
|
+
ARROW: PythonUDFType
|
|
24
|
+
def __int__(self) -> int: ...
|
|
25
|
+
def __index__(self) -> int: ...
|
|
26
|
+
@property
|
|
27
|
+
def __members__(self) -> Dict[str, PythonUDFType]: ...
|
|
28
|
+
@property
|
|
29
|
+
def name(self) -> str: ...
|
|
30
|
+
@property
|
|
31
|
+
def value(self) -> int: ...
|
duckdb/polars_io.py
CHANGED
|
@@ -1,30 +1,20 @@
|
|
|
1
|
-
|
|
1
|
+
import duckdb
|
|
2
|
+
import polars as pl
|
|
3
|
+
from typing import Iterator, Optional
|
|
2
4
|
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
+
from polars.io.plugins import register_io_source
|
|
6
|
+
from duckdb import SQLExpression
|
|
5
7
|
import json
|
|
6
|
-
import typing
|
|
7
8
|
from decimal import Decimal
|
|
9
|
+
import datetime
|
|
8
10
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
if typing.TYPE_CHECKING:
|
|
15
|
-
from collections.abc import Iterator
|
|
16
|
-
|
|
17
|
-
import typing_extensions
|
|
18
|
-
|
|
19
|
-
_ExpressionTree: typing_extensions.TypeAlias = typing.Dict[str, typing.Union[str, int, "_ExpressionTree", typing.Any]] # noqa: UP006
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def _predicate_to_expression(predicate: pl.Expr) -> duckdb.Expression | None:
|
|
23
|
-
"""Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
|
|
24
|
-
|
|
11
|
+
def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
|
|
12
|
+
"""
|
|
13
|
+
Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
|
|
14
|
+
|
|
25
15
|
Parameters:
|
|
26
16
|
predicate (pl.Expr): A Polars expression (e.g., col("foo") > 5)
|
|
27
|
-
|
|
17
|
+
|
|
28
18
|
Returns:
|
|
29
19
|
SQLExpression: A DuckDB SQL expression string equivalent.
|
|
30
20
|
None: If conversion fails.
|
|
@@ -35,19 +25,20 @@ def _predicate_to_expression(predicate: pl.Expr) -> duckdb.Expression | None:
|
|
|
35
25
|
"""
|
|
36
26
|
# Serialize the Polars expression tree to JSON
|
|
37
27
|
tree = json.loads(predicate.meta.serialize(format="json"))
|
|
38
|
-
|
|
28
|
+
|
|
39
29
|
try:
|
|
40
30
|
# Convert the tree to SQL
|
|
41
31
|
sql_filter = _pl_tree_to_sql(tree)
|
|
42
|
-
return
|
|
43
|
-
except
|
|
32
|
+
return SQLExpression(sql_filter)
|
|
33
|
+
except:
|
|
44
34
|
# If the conversion fails, we return None
|
|
45
35
|
return None
|
|
46
36
|
|
|
47
37
|
|
|
48
38
|
def _pl_operation_to_sql(op: str) -> str:
|
|
49
|
-
"""
|
|
50
|
-
|
|
39
|
+
"""
|
|
40
|
+
Map Polars binary operation strings to SQL equivalents.
|
|
41
|
+
|
|
51
42
|
Example:
|
|
52
43
|
>>> _pl_operation_to_sql("Eq")
|
|
53
44
|
'='
|
|
@@ -64,11 +55,12 @@ def _pl_operation_to_sql(op: str) -> str:
|
|
|
64
55
|
"Or": "OR",
|
|
65
56
|
}[op]
|
|
66
57
|
except KeyError:
|
|
67
|
-
raise NotImplementedError(op)
|
|
58
|
+
raise NotImplementedError(op)
|
|
68
59
|
|
|
69
60
|
|
|
70
61
|
def _escape_sql_identifier(identifier: str) -> str:
|
|
71
|
-
"""
|
|
62
|
+
"""
|
|
63
|
+
Escape SQL identifiers by doubling any double quotes and wrapping in double quotes.
|
|
72
64
|
|
|
73
65
|
Example:
|
|
74
66
|
>>> _escape_sql_identifier('column"name')
|
|
@@ -78,15 +70,16 @@ def _escape_sql_identifier(identifier: str) -> str:
|
|
|
78
70
|
return f'"{escaped}"'
|
|
79
71
|
|
|
80
72
|
|
|
81
|
-
def _pl_tree_to_sql(tree:
|
|
82
|
-
"""
|
|
83
|
-
|
|
73
|
+
def _pl_tree_to_sql(tree: dict) -> str:
|
|
74
|
+
"""
|
|
75
|
+
Recursively convert a Polars expression tree (as JSON) to a SQL string.
|
|
76
|
+
|
|
84
77
|
Parameters:
|
|
85
78
|
tree (dict): JSON-deserialized expression tree from Polars
|
|
86
|
-
|
|
79
|
+
|
|
87
80
|
Returns:
|
|
88
81
|
str: SQL expression string
|
|
89
|
-
|
|
82
|
+
|
|
90
83
|
Example:
|
|
91
84
|
Input tree:
|
|
92
85
|
{
|
|
@@ -99,51 +92,36 @@ def _pl_tree_to_sql(tree: _ExpressionTree) -> str:
|
|
|
99
92
|
Output: "(foo > 5)"
|
|
100
93
|
"""
|
|
101
94
|
[node_type] = tree.keys()
|
|
95
|
+
subtree = tree[node_type]
|
|
102
96
|
|
|
103
97
|
if node_type == "BinaryExpr":
|
|
104
98
|
# Binary expressions: left OP right
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
99
|
+
return (
|
|
100
|
+
"(" +
|
|
101
|
+
" ".join((
|
|
102
|
+
_pl_tree_to_sql(subtree['left']),
|
|
103
|
+
_pl_operation_to_sql(subtree['op']),
|
|
104
|
+
_pl_tree_to_sql(subtree['right'])
|
|
105
|
+
)) +
|
|
106
|
+
")"
|
|
107
|
+
)
|
|
112
108
|
if node_type == "Column":
|
|
113
109
|
# A reference to a column name
|
|
114
110
|
# Wrap in quotes to handle special characters
|
|
115
|
-
|
|
116
|
-
assert isinstance(col_name, str), f"The col name of a {node_type} should be a str but got {type(col_name)}"
|
|
117
|
-
return _escape_sql_identifier(col_name)
|
|
111
|
+
return _escape_sql_identifier(subtree)
|
|
118
112
|
|
|
119
113
|
if node_type in ("Literal", "Dyn"):
|
|
120
114
|
# Recursively process dynamic or literal values
|
|
121
|
-
|
|
122
|
-
assert isinstance(val_tree, dict), f"A {node_type} should be a dict but got {type(val_tree)}"
|
|
123
|
-
return _pl_tree_to_sql(val_tree)
|
|
115
|
+
return _pl_tree_to_sql(subtree)
|
|
124
116
|
|
|
125
117
|
if node_type == "Int":
|
|
126
118
|
# Direct integer literals
|
|
127
|
-
|
|
128
|
-
assert isinstance(int_literal, (int, str)), (
|
|
129
|
-
f"The value of an Int should be an int or str but got {type(int_literal)}"
|
|
130
|
-
)
|
|
131
|
-
return str(int_literal)
|
|
119
|
+
return str(subtree)
|
|
132
120
|
|
|
133
121
|
if node_type == "Function":
|
|
134
122
|
# Handle boolean functions like IsNull, IsNotNull
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
inputs = func_tree["input"]
|
|
138
|
-
assert isinstance(inputs, list), f"A {node_type} should have a list of dicts as input but got {type(inputs)}"
|
|
139
|
-
input_tree = inputs[0]
|
|
140
|
-
assert isinstance(input_tree, dict), (
|
|
141
|
-
f"A {node_type} should have a list of dicts as input but got {type(input_tree)}"
|
|
142
|
-
)
|
|
143
|
-
func_dict = func_tree["function"]
|
|
144
|
-
assert isinstance(func_dict, dict), (
|
|
145
|
-
f"A {node_type} should have a function dict as input but got {type(func_dict)}"
|
|
146
|
-
)
|
|
123
|
+
inputs = subtree["input"]
|
|
124
|
+
func_dict = subtree["function"]
|
|
147
125
|
|
|
148
126
|
if "Boolean" in func_dict:
|
|
149
127
|
func = func_dict["Boolean"]
|
|
@@ -153,107 +131,80 @@ def _pl_tree_to_sql(tree: _ExpressionTree) -> str:
|
|
|
153
131
|
return f"({arg_sql} IS NULL)"
|
|
154
132
|
if func == "IsNotNull":
|
|
155
133
|
return f"({arg_sql} IS NOT NULL)"
|
|
156
|
-
|
|
157
|
-
raise NotImplementedError(msg)
|
|
134
|
+
raise NotImplementedError(f"Boolean function not supported: {func}")
|
|
158
135
|
|
|
159
|
-
|
|
160
|
-
raise NotImplementedError(msg)
|
|
136
|
+
raise NotImplementedError(f"Unsupported function type: {func_dict}")
|
|
161
137
|
|
|
162
138
|
if node_type == "Scalar":
|
|
163
139
|
# Detect format: old style (dtype/value) or new style (direct type key)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
dtype = str(scalar_tree["dtype"])
|
|
168
|
-
value = scalar_tree["value"]
|
|
140
|
+
if "dtype" in subtree and "value" in subtree:
|
|
141
|
+
dtype = str(subtree["dtype"])
|
|
142
|
+
value = subtree["value"]
|
|
169
143
|
else:
|
|
170
144
|
# New style: dtype is the single key in the dict
|
|
171
|
-
dtype = next(iter(
|
|
172
|
-
value =
|
|
173
|
-
assert isinstance(dtype, str), f"A {node_type} should have a str dtype but got {type(dtype)}"
|
|
174
|
-
assert isinstance(value, dict), f"A {node_type} should have a dict value but got {type(value)}"
|
|
145
|
+
dtype = next(iter(subtree.keys()))
|
|
146
|
+
value = subtree
|
|
175
147
|
|
|
176
148
|
# Decimal support
|
|
177
149
|
if dtype.startswith("{'Decimal'") or dtype == "Decimal":
|
|
178
|
-
decimal_value = value[
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
)
|
|
182
|
-
assert 2 <= len(decimal_value) <= 3, (
|
|
183
|
-
f"A {dtype} should be a two or three member list but got {len(decimal_value)} member list"
|
|
184
|
-
)
|
|
185
|
-
return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[-1]))
|
|
150
|
+
decimal_value = value['Decimal']
|
|
151
|
+
decimal_value = Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1])
|
|
152
|
+
return str(decimal_value)
|
|
186
153
|
|
|
187
154
|
# Datetime with microseconds since epoch
|
|
188
155
|
if dtype.startswith("{'Datetime'") or dtype == "Datetime":
|
|
189
|
-
micros = value[
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
return f"'{dt_timestamp!s}'::TIMESTAMP"
|
|
156
|
+
micros = value['Datetime'][0]
|
|
157
|
+
dt_timestamp = datetime.datetime.fromtimestamp(micros / 1_000_000, tz=datetime.UTC)
|
|
158
|
+
return f"'{str(dt_timestamp)}'::TIMESTAMP"
|
|
193
159
|
|
|
194
160
|
# Match simple numeric/boolean types
|
|
195
|
-
if dtype in (
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
"Int32",
|
|
199
|
-
"Int64",
|
|
200
|
-
"UInt8",
|
|
201
|
-
"UInt16",
|
|
202
|
-
"UInt32",
|
|
203
|
-
"UInt64",
|
|
204
|
-
"Float32",
|
|
205
|
-
"Float64",
|
|
206
|
-
"Boolean",
|
|
207
|
-
):
|
|
161
|
+
if dtype in ("Int8", "Int16", "Int32", "Int64",
|
|
162
|
+
"UInt8", "UInt16", "UInt32", "UInt64",
|
|
163
|
+
"Float32", "Float64", "Boolean"):
|
|
208
164
|
return str(value[dtype])
|
|
209
165
|
|
|
210
166
|
# Time type
|
|
211
167
|
if dtype == "Time":
|
|
212
168
|
nanoseconds = value["Time"]
|
|
213
|
-
assert isinstance(nanoseconds, int), f"A {dtype} should be an int but got {type(nanoseconds)}"
|
|
214
169
|
seconds = nanoseconds // 1_000_000_000
|
|
215
170
|
microseconds = (nanoseconds % 1_000_000_000) // 1_000
|
|
216
|
-
dt_time = (datetime.datetime.min + datetime.timedelta(
|
|
171
|
+
dt_time = (datetime.datetime.min + datetime.timedelta(
|
|
172
|
+
seconds=seconds, microseconds=microseconds
|
|
173
|
+
)).time()
|
|
217
174
|
return f"'{dt_time}'::TIME"
|
|
218
175
|
|
|
219
176
|
# Date type
|
|
220
177
|
if dtype == "Date":
|
|
221
178
|
days_since_epoch = value["Date"]
|
|
222
|
-
assert isinstance(days_since_epoch, (float, int)), (
|
|
223
|
-
f"A {dtype} should be a number but got {type(days_since_epoch)}"
|
|
224
|
-
)
|
|
225
179
|
date = datetime.date(1970, 1, 1) + datetime.timedelta(days=days_since_epoch)
|
|
226
180
|
return f"'{date}'::DATE"
|
|
227
181
|
|
|
228
182
|
# Binary type
|
|
229
183
|
if dtype == "Binary":
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
binary_data = bytes(bin_value)
|
|
233
|
-
escaped = "".join(f"\\x{b:02x}" for b in binary_data)
|
|
184
|
+
binary_data = bytes(value["Binary"])
|
|
185
|
+
escaped = ''.join(f'\\x{b:02x}' for b in binary_data)
|
|
234
186
|
return f"'{escaped}'::BLOB"
|
|
235
187
|
|
|
236
188
|
# String type
|
|
237
189
|
if dtype == "String" or dtype == "StringOwned":
|
|
238
190
|
# Some new formats may store directly under StringOwned
|
|
239
|
-
string_val
|
|
191
|
+
string_val = value.get("StringOwned", value.get("String", None))
|
|
240
192
|
return f"'{string_val}'"
|
|
241
193
|
|
|
242
|
-
msg = f"Unsupported scalar type {dtype!s}, with value {value}"
|
|
243
|
-
raise NotImplementedError(msg)
|
|
244
194
|
|
|
245
|
-
|
|
246
|
-
raise NotImplementedError(msg)
|
|
195
|
+
raise NotImplementedError(f"Unsupported scalar type {str(dtype)}, with value {value}")
|
|
247
196
|
|
|
197
|
+
raise NotImplementedError(f"Node type: {node_type} is not implemented. {subtree}")
|
|
248
198
|
|
|
249
199
|
def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -> pl.LazyFrame:
|
|
250
|
-
"""
|
|
251
|
-
|
|
200
|
+
"""
|
|
201
|
+
A polars IO plugin for DuckDB.
|
|
202
|
+
"""
|
|
252
203
|
def source_generator(
|
|
253
|
-
with_columns: list[str]
|
|
254
|
-
predicate: pl.Expr
|
|
255
|
-
n_rows: int
|
|
256
|
-
batch_size: int
|
|
204
|
+
with_columns: Optional[list[str]],
|
|
205
|
+
predicate: Optional[pl.Expr],
|
|
206
|
+
n_rows: Optional[int],
|
|
207
|
+
batch_size: Optional[int],
|
|
257
208
|
) -> Iterator[pl.DataFrame]:
|
|
258
209
|
duck_predicate = None
|
|
259
210
|
relation_final = relation
|
|
@@ -264,8 +215,7 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -
|
|
|
264
215
|
relation_final = relation_final.limit(n_rows)
|
|
265
216
|
if predicate is not None:
|
|
266
217
|
# We have a predicate, if possible, we push it down to DuckDB
|
|
267
|
-
|
|
268
|
-
duck_predicate = _predicate_to_expression(predicate)
|
|
218
|
+
duck_predicate = _predicate_to_expression(predicate)
|
|
269
219
|
# Try to pushdown filter, if one exists
|
|
270
220
|
if duck_predicate is not None:
|
|
271
221
|
relation_final = relation_final.filter(duck_predicate)
|
|
@@ -273,12 +223,15 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -
|
|
|
273
223
|
results = relation_final.fetch_arrow_reader()
|
|
274
224
|
else:
|
|
275
225
|
results = relation_final.fetch_arrow_reader(batch_size)
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
226
|
+
while True:
|
|
227
|
+
try:
|
|
228
|
+
record_batch = results.read_next_batch()
|
|
229
|
+
if predicate is not None and duck_predicate is None:
|
|
230
|
+
# We have a predicate, but did not manage to push it down, we fallback here
|
|
231
|
+
yield pl.from_arrow(record_batch).filter(predicate)
|
|
232
|
+
else:
|
|
233
|
+
yield pl.from_arrow(record_batch)
|
|
234
|
+
except StopIteration:
|
|
235
|
+
break
|
|
283
236
|
|
|
284
237
|
return register_io_source(source_generator, schema=schema)
|