duckdb 0.8.2.dev3007__cp311-cp311-win_amd64.whl → 1.4.3.dev8__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _duckdb-stubs/__init__.pyi +1478 -0
- _duckdb-stubs/_func.pyi +46 -0
- _duckdb-stubs/_sqltypes.pyi +75 -0
- duckdb/duckdb.cp311-win_amd64.pyd → _duckdb.cp311-win_amd64.pyd +0 -0
- adbc_driver_duckdb/__init__.py +10 -8
- adbc_driver_duckdb/dbapi.py +4 -5
- duckdb/__init__.py +250 -196
- duckdb/_dbapi_type_object.py +231 -0
- duckdb/_version.py +22 -0
- {pyduckdb → duckdb}/bytes_io_wrapper.py +12 -8
- duckdb/experimental/__init__.py +5 -0
- duckdb/experimental/spark/__init__.py +6 -0
- {pyduckdb → duckdb/experimental}/spark/_globals.py +8 -8
- duckdb/experimental/spark/_typing.py +46 -0
- duckdb/experimental/spark/conf.py +46 -0
- duckdb/experimental/spark/context.py +180 -0
- duckdb/experimental/spark/errors/__init__.py +70 -0
- duckdb/experimental/spark/errors/error_classes.py +918 -0
- duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
- duckdb/experimental/spark/errors/exceptions/base.py +168 -0
- duckdb/experimental/spark/errors/utils.py +111 -0
- duckdb/experimental/spark/exception.py +18 -0
- {pyduckdb → duckdb/experimental}/spark/sql/__init__.py +5 -5
- duckdb/experimental/spark/sql/_typing.py +86 -0
- duckdb/experimental/spark/sql/catalog.py +79 -0
- duckdb/experimental/spark/sql/column.py +361 -0
- duckdb/experimental/spark/sql/conf.py +24 -0
- duckdb/experimental/spark/sql/dataframe.py +1389 -0
- duckdb/experimental/spark/sql/functions.py +6195 -0
- duckdb/experimental/spark/sql/group.py +424 -0
- duckdb/experimental/spark/sql/readwriter.py +435 -0
- duckdb/experimental/spark/sql/session.py +297 -0
- duckdb/experimental/spark/sql/streaming.py +36 -0
- duckdb/experimental/spark/sql/type_utils.py +107 -0
- {pyduckdb → duckdb/experimental}/spark/sql/types.py +323 -342
- duckdb/experimental/spark/sql/udf.py +37 -0
- duckdb/filesystem.py +33 -0
- duckdb/func/__init__.py +3 -0
- duckdb/functional/__init__.py +12 -16
- duckdb/polars_io.py +284 -0
- duckdb/py.typed +0 -0
- duckdb/query_graph/__main__.py +358 -0
- duckdb/sqltypes/__init__.py +63 -0
- duckdb/typing/__init__.py +18 -6
- {pyduckdb → duckdb}/udf.py +10 -5
- duckdb/value/__init__.py +1 -0
- pyduckdb/value/constant.py → duckdb/value/constant/__init__.py +66 -57
- duckdb-1.4.3.dev8.dist-info/METADATA +88 -0
- duckdb-1.4.3.dev8.dist-info/RECORD +52 -0
- {duckdb-0.8.2.dev3007.dist-info → duckdb-1.4.3.dev8.dist-info}/WHEEL +1 -1
- duckdb-1.4.3.dev8.dist-info/licenses/LICENSE +7 -0
- duckdb-0.8.2.dev3007.dist-info/METADATA +0 -20
- duckdb-0.8.2.dev3007.dist-info/RECORD +0 -34
- duckdb-0.8.2.dev3007.dist-info/top_level.txt +0 -4
- duckdb-stubs/__init__.pyi +0 -574
- duckdb-stubs/functional/__init__.pyi +0 -33
- duckdb-stubs/typing/__init__.pyi +0 -35
- pyduckdb/__init__.py +0 -61
- pyduckdb/filesystem.py +0 -64
- pyduckdb/spark/__init__.py +0 -7
- pyduckdb/spark/conf.py +0 -45
- pyduckdb/spark/context.py +0 -162
- pyduckdb/spark/exception.py +0 -9
- pyduckdb/spark/sql/catalog.py +0 -78
- pyduckdb/spark/sql/conf.py +0 -23
- pyduckdb/spark/sql/dataframe.py +0 -75
- pyduckdb/spark/sql/readwriter.py +0 -180
- pyduckdb/spark/sql/session.py +0 -249
- pyduckdb/spark/sql/streaming.py +0 -37
- pyduckdb/spark/sql/type_utils.py +0 -104
- pyduckdb/spark/sql/udf.py +0 -9
- {pyduckdb → duckdb/experimental}/spark/LICENSE +0 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""DuckDB DB API 2.0 Type Objects Module.
|
|
2
|
+
|
|
3
|
+
This module provides DB API 2.0 compliant type objects for DuckDB, allowing applications
|
|
4
|
+
to check column types returned by queries against standard database API categories.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
>>> import duckdb
|
|
8
|
+
>>>
|
|
9
|
+
>>> conn = duckdb.connect()
|
|
10
|
+
>>> cursor = conn.cursor()
|
|
11
|
+
>>> cursor.execute("SELECT 'hello' as text_col, 42 as num_col, CURRENT_DATE as date_col")
|
|
12
|
+
>>>
|
|
13
|
+
>>> # Check column types using DB API type objects
|
|
14
|
+
>>> for i, desc in enumerate(cursor.description):
|
|
15
|
+
>>> col_name, col_type = desc[0], desc[1]
|
|
16
|
+
>>> if col_type == duckdb.STRING:
|
|
17
|
+
>>> print(f"{col_name} is a string type")
|
|
18
|
+
>>> elif col_type == duckdb.NUMBER:
|
|
19
|
+
>>> print(f"{col_name} is a numeric type")
|
|
20
|
+
>>> elif col_type == duckdb.DATETIME:
|
|
21
|
+
>>> print(f"{col_name} is a date/time type")
|
|
22
|
+
|
|
23
|
+
See Also:
|
|
24
|
+
- PEP 249: https://peps.python.org/pep-0249/
|
|
25
|
+
- DuckDB Type System: https://duckdb.org/docs/sql/data_types/overview
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from duckdb import sqltypes
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class DBAPITypeObject:
|
|
32
|
+
"""DB API 2.0 type object for categorizing database column types.
|
|
33
|
+
|
|
34
|
+
This class implements the type objects defined in PEP 249 (DB API 2.0).
|
|
35
|
+
It allows checking whether a specific DuckDB type belongs to a broader
|
|
36
|
+
category like STRING, NUMBER, DATETIME, etc.
|
|
37
|
+
|
|
38
|
+
The type object supports equality comparison with DuckDBPyType instances,
|
|
39
|
+
returning True if the type belongs to this category.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
types: A list of DuckDBPyType instances that belong to this type category.
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
>>> string_types = DBAPITypeObject([sqltypes.VARCHAR, sqltypes.CHAR])
|
|
46
|
+
>>> result = sqltypes.VARCHAR == string_types # True
|
|
47
|
+
>>> result = sqltypes.INTEGER == string_types # False
|
|
48
|
+
|
|
49
|
+
Note:
|
|
50
|
+
This follows the DB API 2.0 specification where type objects are compared
|
|
51
|
+
using equality operators rather than isinstance() checks.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self, types: list[sqltypes.DuckDBPyType]) -> None:
|
|
55
|
+
"""Initialize a DB API type object.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
types: List of DuckDB types that belong to this category.
|
|
59
|
+
"""
|
|
60
|
+
self.types = types
|
|
61
|
+
|
|
62
|
+
def __eq__(self, other: object) -> bool:
|
|
63
|
+
"""Check if a DuckDB type belongs to this type category.
|
|
64
|
+
|
|
65
|
+
This method implements the DB API 2.0 type checking mechanism.
|
|
66
|
+
It returns True if the other object is a DuckDBPyType that
|
|
67
|
+
is contained in this type category.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
other: The object to compare, typically a DuckDBPyType instance.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
True if other is a DuckDBPyType in this category, False otherwise.
|
|
74
|
+
|
|
75
|
+
Example:
|
|
76
|
+
>>> NUMBER == sqltypes.INTEGER # True
|
|
77
|
+
>>> NUMBER == sqltypes.VARCHAR # False
|
|
78
|
+
"""
|
|
79
|
+
if isinstance(other, sqltypes.DuckDBPyType):
|
|
80
|
+
return other in self.types
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
def __repr__(self) -> str:
|
|
84
|
+
"""Return a string representation of this type object.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
A string showing the type object and its contained DuckDB types.
|
|
88
|
+
|
|
89
|
+
Example:
|
|
90
|
+
>>> repr(STRING)
|
|
91
|
+
'<DBAPITypeObject [VARCHAR]>'
|
|
92
|
+
"""
|
|
93
|
+
return f"<DBAPITypeObject [{','.join(str(x) for x in self.types)}]>"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# Define the standard DB API 2.0 type objects for DuckDB
|
|
97
|
+
|
|
98
|
+
STRING = DBAPITypeObject([sqltypes.VARCHAR])
|
|
99
|
+
"""
|
|
100
|
+
STRING type object for text-based database columns.
|
|
101
|
+
|
|
102
|
+
This type object represents all string/text types in DuckDB. Currently includes:
|
|
103
|
+
- VARCHAR: Variable-length character strings
|
|
104
|
+
|
|
105
|
+
Use this to check if a column contains textual data that should be handled
|
|
106
|
+
as Python strings.
|
|
107
|
+
|
|
108
|
+
DB API 2.0 Reference:
|
|
109
|
+
https://peps.python.org/pep-0249/#string
|
|
110
|
+
|
|
111
|
+
Example:
|
|
112
|
+
>>> cursor.description[0][1] == STRING # Check if first column is text
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
NUMBER = DBAPITypeObject(
|
|
116
|
+
[
|
|
117
|
+
sqltypes.TINYINT,
|
|
118
|
+
sqltypes.UTINYINT,
|
|
119
|
+
sqltypes.SMALLINT,
|
|
120
|
+
sqltypes.USMALLINT,
|
|
121
|
+
sqltypes.INTEGER,
|
|
122
|
+
sqltypes.UINTEGER,
|
|
123
|
+
sqltypes.BIGINT,
|
|
124
|
+
sqltypes.UBIGINT,
|
|
125
|
+
sqltypes.HUGEINT,
|
|
126
|
+
sqltypes.UHUGEINT,
|
|
127
|
+
sqltypes.DuckDBPyType("BIGNUM"),
|
|
128
|
+
sqltypes.DuckDBPyType("DECIMAL"),
|
|
129
|
+
sqltypes.FLOAT,
|
|
130
|
+
sqltypes.DOUBLE,
|
|
131
|
+
]
|
|
132
|
+
)
|
|
133
|
+
"""
|
|
134
|
+
NUMBER type object for numeric database columns.
|
|
135
|
+
|
|
136
|
+
This type object represents all numeric types in DuckDB, including:
|
|
137
|
+
|
|
138
|
+
Integer Types:
|
|
139
|
+
- TINYINT, UTINYINT: 8-bit signed/unsigned integers
|
|
140
|
+
- SMALLINT, USMALLINT: 16-bit signed/unsigned integers
|
|
141
|
+
- INTEGER, UINTEGER: 32-bit signed/unsigned integers
|
|
142
|
+
- BIGINT, UBIGINT: 64-bit signed/unsigned integers
|
|
143
|
+
- HUGEINT, UHUGEINT: 128-bit signed/unsigned integers
|
|
144
|
+
|
|
145
|
+
Decimal Types:
|
|
146
|
+
- BIGNUM: Arbitrary precision integers
|
|
147
|
+
- DECIMAL: Fixed-point decimal numbers
|
|
148
|
+
|
|
149
|
+
Floating Point Types:
|
|
150
|
+
- FLOAT: 32-bit floating point
|
|
151
|
+
- DOUBLE: 64-bit floating point
|
|
152
|
+
|
|
153
|
+
Use this to check if a column contains numeric data that should be handled
|
|
154
|
+
as Python int, float, or Decimal objects.
|
|
155
|
+
|
|
156
|
+
DB API 2.0 Reference:
|
|
157
|
+
https://peps.python.org/pep-0249/#number
|
|
158
|
+
|
|
159
|
+
Example:
|
|
160
|
+
>>> cursor.description[1][1] == NUMBER # Check if second column is numeric
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
DATETIME = DBAPITypeObject(
|
|
164
|
+
[
|
|
165
|
+
sqltypes.DATE,
|
|
166
|
+
sqltypes.TIME,
|
|
167
|
+
sqltypes.TIME_TZ,
|
|
168
|
+
sqltypes.TIMESTAMP,
|
|
169
|
+
sqltypes.TIMESTAMP_TZ,
|
|
170
|
+
sqltypes.TIMESTAMP_NS,
|
|
171
|
+
sqltypes.TIMESTAMP_MS,
|
|
172
|
+
sqltypes.TIMESTAMP_S,
|
|
173
|
+
]
|
|
174
|
+
)
|
|
175
|
+
"""
|
|
176
|
+
DATETIME type object for date and time database columns.
|
|
177
|
+
|
|
178
|
+
This type object represents all date/time types in DuckDB, including:
|
|
179
|
+
|
|
180
|
+
Date Types:
|
|
181
|
+
- DATE: Calendar dates (year, month, day)
|
|
182
|
+
|
|
183
|
+
Time Types:
|
|
184
|
+
- TIME: Time of day without timezone
|
|
185
|
+
- TIME_TZ: Time of day with timezone
|
|
186
|
+
|
|
187
|
+
Timestamp Types:
|
|
188
|
+
- TIMESTAMP: Date and time without timezone (microsecond precision)
|
|
189
|
+
- TIMESTAMP_TZ: Date and time with timezone
|
|
190
|
+
- TIMESTAMP_NS: Nanosecond precision timestamps
|
|
191
|
+
- TIMESTAMP_MS: Millisecond precision timestamps
|
|
192
|
+
- TIMESTAMP_S: Second precision timestamps
|
|
193
|
+
|
|
194
|
+
Use this to check if a column contains temporal data that should be handled
|
|
195
|
+
as Python datetime, date, or time objects.
|
|
196
|
+
|
|
197
|
+
DB API 2.0 Reference:
|
|
198
|
+
https://peps.python.org/pep-0249/#datetime
|
|
199
|
+
|
|
200
|
+
Example:
|
|
201
|
+
>>> cursor.description[2][1] == DATETIME # Check if third column is date/time
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
BINARY = DBAPITypeObject([sqltypes.BLOB])
|
|
205
|
+
"""
|
|
206
|
+
BINARY type object for binary data database columns.
|
|
207
|
+
|
|
208
|
+
This type object represents binary data types in DuckDB:
|
|
209
|
+
- BLOB: Binary Large Objects for storing arbitrary binary data
|
|
210
|
+
|
|
211
|
+
Use this to check if a column contains binary data that should be handled
|
|
212
|
+
as Python bytes objects.
|
|
213
|
+
|
|
214
|
+
DB API 2.0 Reference:
|
|
215
|
+
https://peps.python.org/pep-0249/#binary
|
|
216
|
+
|
|
217
|
+
Example:
|
|
218
|
+
>>> cursor.description[3][1] == BINARY # Check if fourth column is binary
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
ROWID = None
|
|
222
|
+
"""
|
|
223
|
+
ROWID type object for row identifier columns.
|
|
224
|
+
|
|
225
|
+
DB API 2.0 Reference:
|
|
226
|
+
https://peps.python.org/pep-0249/#rowid
|
|
227
|
+
|
|
228
|
+
Note:
|
|
229
|
+
This will always be None for DuckDB connections. Applications should not
|
|
230
|
+
rely on ROWID functionality when using DuckDB.
|
|
231
|
+
"""
|
duckdb/_version.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------
|
|
2
|
+
# Version API
|
|
3
|
+
#
|
|
4
|
+
# We provide three symbols:
|
|
5
|
+
# - duckdb.__version__: The version of this package
|
|
6
|
+
# - duckdb.__duckdb_version__: The version of duckdb that is bundled
|
|
7
|
+
# - duckdb.version(): A human-readable version string containing both of the above
|
|
8
|
+
# ----------------------------------------------------------------------
|
|
9
|
+
from importlib.metadata import version as _dist_version
|
|
10
|
+
|
|
11
|
+
import _duckdb
|
|
12
|
+
|
|
13
|
+
__version__: str = _dist_version("duckdb")
|
|
14
|
+
"""Version of the DuckDB Python Package."""
|
|
15
|
+
|
|
16
|
+
__duckdb_version__: str = _duckdb.__version__
|
|
17
|
+
"""Version of DuckDB that is bundled."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def version() -> str:
|
|
21
|
+
"""Human-friendly formatted version string of both the distribution package and the bundled DuckDB engine."""
|
|
22
|
+
return f"{__version__} (with duckdb {_duckdb.__version__})"
|
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
from typing import Union
|
|
1
|
+
"""StringIO buffer wrapper.
|
|
3
2
|
|
|
4
|
-
"""
|
|
5
3
|
BSD 3-Clause License
|
|
6
4
|
|
|
7
5
|
Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
|
|
@@ -35,11 +33,17 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
35
33
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
36
34
|
"""
|
|
37
35
|
|
|
36
|
+
from io import StringIO, TextIOBase
|
|
37
|
+
from typing import Any, Union
|
|
38
|
+
|
|
38
39
|
|
|
39
40
|
class BytesIOWrapper:
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
"""Wrapper that wraps a StringIO buffer and reads bytes from it.
|
|
42
|
+
|
|
43
|
+
Created for compat with pyarrow read_csv.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self, buffer: Union[StringIO, TextIOBase], encoding: str = "utf-8") -> None: # noqa: D107
|
|
43
47
|
self.buffer = buffer
|
|
44
48
|
self.encoding = encoding
|
|
45
49
|
# Because a character can be represented by more than 1 byte,
|
|
@@ -48,10 +52,10 @@ class BytesIOWrapper:
|
|
|
48
52
|
# overflow to the front of the bytestring the next time reading is performed
|
|
49
53
|
self.overflow = b""
|
|
50
54
|
|
|
51
|
-
def __getattr__(self, attr: str):
|
|
55
|
+
def __getattr__(self, attr: str) -> Any: # noqa: D105, ANN401
|
|
52
56
|
return getattr(self.buffer, attr)
|
|
53
57
|
|
|
54
|
-
def read(self, n: Union[int, None] = -1) -> bytes:
|
|
58
|
+
def read(self, n: Union[int, None] = -1) -> bytes: # noqa: D102
|
|
55
59
|
assert self.buffer is not None
|
|
56
60
|
bytestring = self.buffer.read(n).encode(self.encoding)
|
|
57
61
|
# When n=-1/n greater than remaining bytes: Read entire file/rest of file
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from .conf import SparkConf # noqa: D104
|
|
2
|
+
from .context import SparkContext
|
|
3
|
+
from .exception import ContributionsAcceptedError
|
|
4
|
+
from .sql import DataFrame, SparkSession
|
|
5
|
+
|
|
6
|
+
__all__ = ["ContributionsAcceptedError", "DataFrame", "SparkConf", "SparkContext", "SparkSession"]
|
|
@@ -15,8 +15,7 @@
|
|
|
15
15
|
# limitations under the License.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
"""
|
|
19
|
-
Module defining global singleton classes.
|
|
18
|
+
"""Module defining global singleton classes.
|
|
20
19
|
|
|
21
20
|
This module raises a RuntimeError if an attempt to reload it is made. In that
|
|
22
21
|
way the identities of the classes defined here are fixed and will remain so
|
|
@@ -38,7 +37,8 @@ __ALL__ = ["_NoValue"]
|
|
|
38
37
|
# Disallow reloading this module so as to preserve the identities of the
|
|
39
38
|
# classes defined here.
|
|
40
39
|
if "_is_loaded" in globals():
|
|
41
|
-
|
|
40
|
+
msg = "Reloading duckdb.experimental.spark._globals is not allowed"
|
|
41
|
+
raise RuntimeError(msg)
|
|
42
42
|
_is_loaded = True
|
|
43
43
|
|
|
44
44
|
|
|
@@ -54,23 +54,23 @@ class _NoValueType:
|
|
|
54
54
|
|
|
55
55
|
__instance = None
|
|
56
56
|
|
|
57
|
-
def __new__(cls):
|
|
57
|
+
def __new__(cls) -> "_NoValueType":
|
|
58
58
|
# ensure that only one instance exists
|
|
59
59
|
if not cls.__instance:
|
|
60
|
-
cls.__instance = super(
|
|
60
|
+
cls.__instance = super().__new__(cls)
|
|
61
61
|
return cls.__instance
|
|
62
62
|
|
|
63
63
|
# Make the _NoValue instance falsey
|
|
64
|
-
def __nonzero__(self):
|
|
64
|
+
def __nonzero__(self) -> bool:
|
|
65
65
|
return False
|
|
66
66
|
|
|
67
67
|
__bool__ = __nonzero__
|
|
68
68
|
|
|
69
69
|
# needed for python 2 to preserve identity through a pickle
|
|
70
|
-
def __reduce__(self):
|
|
70
|
+
def __reduce__(self) -> tuple[type, tuple]:
|
|
71
71
|
return (self.__class__, ())
|
|
72
72
|
|
|
73
|
-
def __repr__(self):
|
|
73
|
+
def __repr__(self) -> str:
|
|
74
74
|
return "<no value>"
|
|
75
75
|
|
|
76
76
|
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
|
4
|
+
# distributed with this work for additional information
|
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
|
7
|
+
# "License"); you may not use this file except in compliance
|
|
8
|
+
# with the License. You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
|
13
|
+
# software distributed under the License is distributed on an
|
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
# KIND, either express or implied. See the License for the
|
|
16
|
+
# specific language governing permissions and limitations
|
|
17
|
+
# under the License.
|
|
18
|
+
|
|
19
|
+
from collections.abc import Iterable, Sized
|
|
20
|
+
from typing import Callable, TypeVar, Union
|
|
21
|
+
|
|
22
|
+
from numpy import float32, float64, int32, int64, ndarray
|
|
23
|
+
from typing_extensions import Literal, Protocol, Self
|
|
24
|
+
|
|
25
|
+
F = TypeVar("F", bound=Callable)
|
|
26
|
+
T_co = TypeVar("T_co", covariant=True)
|
|
27
|
+
|
|
28
|
+
PrimitiveType = Union[bool, float, int, str]
|
|
29
|
+
|
|
30
|
+
NonUDFType = Literal[0]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SupportsIAdd(Protocol):
|
|
34
|
+
def __iadd__(self, other: "SupportsIAdd") -> Self: ...
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SupportsOrdering(Protocol):
|
|
38
|
+
def __lt__(self, other: "SupportsOrdering") -> bool: ...
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SizedIterable(Protocol, Sized, Iterable[T_co]): ...
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
S = TypeVar("S", bound=SupportsOrdering)
|
|
45
|
+
|
|
46
|
+
NumberOrArray = TypeVar("NumberOrArray", float, int, complex, int32, int64, float32, float64, ndarray)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from typing import Optional # noqa: D100
|
|
2
|
+
|
|
3
|
+
from duckdb.experimental.spark.exception import ContributionsAcceptedError
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SparkConf: # noqa: D101
|
|
7
|
+
def __init__(self) -> None: # noqa: D107
|
|
8
|
+
raise NotImplementedError
|
|
9
|
+
|
|
10
|
+
def contains(self, key: str) -> bool: # noqa: D102
|
|
11
|
+
raise ContributionsAcceptedError
|
|
12
|
+
|
|
13
|
+
def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]: # noqa: D102
|
|
14
|
+
raise ContributionsAcceptedError
|
|
15
|
+
|
|
16
|
+
def getAll(self) -> list[tuple[str, str]]: # noqa: D102
|
|
17
|
+
raise ContributionsAcceptedError
|
|
18
|
+
|
|
19
|
+
def set(self, key: str, value: str) -> "SparkConf": # noqa: D102
|
|
20
|
+
raise ContributionsAcceptedError
|
|
21
|
+
|
|
22
|
+
def setAll(self, pairs: list[tuple[str, str]]) -> "SparkConf": # noqa: D102
|
|
23
|
+
raise ContributionsAcceptedError
|
|
24
|
+
|
|
25
|
+
def setAppName(self, value: str) -> "SparkConf": # noqa: D102
|
|
26
|
+
raise ContributionsAcceptedError
|
|
27
|
+
|
|
28
|
+
def setExecutorEnv( # noqa: D102
|
|
29
|
+
self, key: Optional[str] = None, value: Optional[str] = None, pairs: Optional[list[tuple[str, str]]] = None
|
|
30
|
+
) -> "SparkConf":
|
|
31
|
+
raise ContributionsAcceptedError
|
|
32
|
+
|
|
33
|
+
def setIfMissing(self, key: str, value: str) -> "SparkConf": # noqa: D102
|
|
34
|
+
raise ContributionsAcceptedError
|
|
35
|
+
|
|
36
|
+
def setMaster(self, value: str) -> "SparkConf": # noqa: D102
|
|
37
|
+
raise ContributionsAcceptedError
|
|
38
|
+
|
|
39
|
+
def setSparkHome(self, value: str) -> "SparkConf": # noqa: D102
|
|
40
|
+
raise ContributionsAcceptedError
|
|
41
|
+
|
|
42
|
+
def toDebugString(self) -> str: # noqa: D102
|
|
43
|
+
raise ContributionsAcceptedError
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
__all__ = ["SparkConf"]
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from typing import Optional # noqa: D100
|
|
2
|
+
|
|
3
|
+
import duckdb
|
|
4
|
+
from duckdb import DuckDBPyConnection
|
|
5
|
+
from duckdb.experimental.spark.conf import SparkConf
|
|
6
|
+
from duckdb.experimental.spark.exception import ContributionsAcceptedError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SparkContext: # noqa: D101
|
|
10
|
+
def __init__(self, master: str) -> None: # noqa: D107
|
|
11
|
+
self._connection = duckdb.connect(":memory:")
|
|
12
|
+
# This aligns the null ordering with Spark.
|
|
13
|
+
self._connection.execute("set default_null_order='nulls_first_on_asc_last_on_desc'")
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def connection(self) -> DuckDBPyConnection: # noqa: D102
|
|
17
|
+
return self._connection
|
|
18
|
+
|
|
19
|
+
def stop(self) -> None: # noqa: D102
|
|
20
|
+
self._connection.close()
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def getOrCreate(cls, conf: Optional[SparkConf] = None) -> "SparkContext": # noqa: D102
|
|
24
|
+
raise ContributionsAcceptedError
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def setSystemProperty(cls, key: str, value: str) -> None: # noqa: D102
|
|
28
|
+
raise ContributionsAcceptedError
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def applicationId(self) -> str: # noqa: D102
|
|
32
|
+
raise ContributionsAcceptedError
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def defaultMinPartitions(self) -> int: # noqa: D102
|
|
36
|
+
raise ContributionsAcceptedError
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def defaultParallelism(self) -> int: # noqa: D102
|
|
40
|
+
raise ContributionsAcceptedError
|
|
41
|
+
|
|
42
|
+
# @property
|
|
43
|
+
# def resources(self) -> Dict[str, ResourceInformation]:
|
|
44
|
+
# raise ContributionsAcceptedError
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def startTime(self) -> str: # noqa: D102
|
|
48
|
+
raise ContributionsAcceptedError
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def uiWebUrl(self) -> str: # noqa: D102
|
|
52
|
+
raise ContributionsAcceptedError
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def version(self) -> str: # noqa: D102
|
|
56
|
+
raise ContributionsAcceptedError
|
|
57
|
+
|
|
58
|
+
def __repr__(self) -> str: # noqa: D105
|
|
59
|
+
raise ContributionsAcceptedError
|
|
60
|
+
|
|
61
|
+
# def accumulator(self, value: ~T, accum_param: Optional[ForwardRef('AccumulatorParam[T]')] = None
|
|
62
|
+
# ) -> 'Accumulator[T]':
|
|
63
|
+
# pass
|
|
64
|
+
|
|
65
|
+
def addArchive(self, path: str) -> None: # noqa: D102
|
|
66
|
+
raise ContributionsAcceptedError
|
|
67
|
+
|
|
68
|
+
def addFile(self, path: str, recursive: bool = False) -> None: # noqa: D102
|
|
69
|
+
raise ContributionsAcceptedError
|
|
70
|
+
|
|
71
|
+
def addPyFile(self, path: str) -> None: # noqa: D102
|
|
72
|
+
raise ContributionsAcceptedError
|
|
73
|
+
|
|
74
|
+
# def binaryFiles(self, path: str, minPartitions: Optional[int] = None
|
|
75
|
+
# ) -> duckdb.experimental.spark.rdd.RDD[typing.Tuple[str, bytes]]:
|
|
76
|
+
# pass
|
|
77
|
+
|
|
78
|
+
# def binaryRecords(self, path: str, recordLength: int) -> duckdb.experimental.spark.rdd.RDD[bytes]:
|
|
79
|
+
# pass
|
|
80
|
+
|
|
81
|
+
# def broadcast(self, value: ~T) -> 'Broadcast[T]':
|
|
82
|
+
# pass
|
|
83
|
+
|
|
84
|
+
def cancelAllJobs(self) -> None: # noqa: D102
|
|
85
|
+
raise ContributionsAcceptedError
|
|
86
|
+
|
|
87
|
+
def cancelJobGroup(self, groupId: str) -> None: # noqa: D102
|
|
88
|
+
raise ContributionsAcceptedError
|
|
89
|
+
|
|
90
|
+
def dump_profiles(self, path: str) -> None: # noqa: D102
|
|
91
|
+
raise ContributionsAcceptedError
|
|
92
|
+
|
|
93
|
+
# def emptyRDD(self) -> duckdb.experimental.spark.rdd.RDD[typing.Any]:
|
|
94
|
+
# pass
|
|
95
|
+
|
|
96
|
+
def getCheckpointDir(self) -> Optional[str]: # noqa: D102
|
|
97
|
+
raise ContributionsAcceptedError
|
|
98
|
+
|
|
99
|
+
def getConf(self) -> SparkConf: # noqa: D102
|
|
100
|
+
raise ContributionsAcceptedError
|
|
101
|
+
|
|
102
|
+
def getLocalProperty(self, key: str) -> Optional[str]: # noqa: D102
|
|
103
|
+
raise ContributionsAcceptedError
|
|
104
|
+
|
|
105
|
+
# def hadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str,
|
|
106
|
+
# keyConverter: Optional[str] = None, valueConverter: Optional[str] = None,
|
|
107
|
+
# conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
108
|
+
# pass
|
|
109
|
+
|
|
110
|
+
# def hadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None,
|
|
111
|
+
# valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0
|
|
112
|
+
# ) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
113
|
+
# pass
|
|
114
|
+
|
|
115
|
+
# def newAPIHadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str,
|
|
116
|
+
# keyConverter: Optional[str] = None, valueConverter: Optional[str] = None,
|
|
117
|
+
# conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
118
|
+
# pass
|
|
119
|
+
|
|
120
|
+
# def newAPIHadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str,
|
|
121
|
+
# keyConverter: Optional[str] = None, valueConverter: Optional[str] = None,
|
|
122
|
+
# conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
123
|
+
# pass
|
|
124
|
+
|
|
125
|
+
# def parallelize(self, c: Iterable[~T], numSlices: Optional[int] = None) -> pyspark.rdd.RDD[~T]:
|
|
126
|
+
# pass
|
|
127
|
+
|
|
128
|
+
# def pickleFile(self, name: str, minPartitions: Optional[int] = None) -> pyspark.rdd.RDD[typing.Any]:
|
|
129
|
+
# pass
|
|
130
|
+
|
|
131
|
+
# def range(self, start: int, end: Optional[int] = None, step: int = 1, numSlices: Optional[int] = None
|
|
132
|
+
# ) -> pyspark.rdd.RDD[int]:
|
|
133
|
+
# pass
|
|
134
|
+
|
|
135
|
+
# def runJob(self, rdd: pyspark.rdd.RDD[~T], partitionFunc: Callable[[Iterable[~T]], Iterable[~U]],
|
|
136
|
+
# partitions: Optional[Sequence[int]] = None, allowLocal: bool = False) -> List[~U]:
|
|
137
|
+
# pass
|
|
138
|
+
|
|
139
|
+
# def sequenceFile(self, path: str, keyClass: Optional[str] = None, valueClass: Optional[str] = None,
|
|
140
|
+
# keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, minSplits: Optional[int] = None,
|
|
141
|
+
# batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
142
|
+
# pass
|
|
143
|
+
|
|
144
|
+
def setCheckpointDir(self, dirName: str) -> None: # noqa: D102
|
|
145
|
+
raise ContributionsAcceptedError
|
|
146
|
+
|
|
147
|
+
def setJobDescription(self, value: str) -> None: # noqa: D102
|
|
148
|
+
raise ContributionsAcceptedError
|
|
149
|
+
|
|
150
|
+
def setJobGroup(self, groupId: str, description: str, interruptOnCancel: bool = False) -> None: # noqa: D102
|
|
151
|
+
raise ContributionsAcceptedError
|
|
152
|
+
|
|
153
|
+
def setLocalProperty(self, key: str, value: str) -> None: # noqa: D102
|
|
154
|
+
raise ContributionsAcceptedError
|
|
155
|
+
|
|
156
|
+
def setLogLevel(self, logLevel: str) -> None: # noqa: D102
|
|
157
|
+
raise ContributionsAcceptedError
|
|
158
|
+
|
|
159
|
+
def show_profiles(self) -> None: # noqa: D102
|
|
160
|
+
raise ContributionsAcceptedError
|
|
161
|
+
|
|
162
|
+
def sparkUser(self) -> str: # noqa: D102
|
|
163
|
+
raise ContributionsAcceptedError
|
|
164
|
+
|
|
165
|
+
# def statusTracker(self) -> duckdb.experimental.spark.status.StatusTracker:
|
|
166
|
+
# raise ContributionsAcceptedError
|
|
167
|
+
|
|
168
|
+
# def textFile(self, name: str, minPartitions: Optional[int] = None, use_unicode: bool = True
|
|
169
|
+
# ) -> pyspark.rdd.RDD[str]:
|
|
170
|
+
# pass
|
|
171
|
+
|
|
172
|
+
# def union(self, rdds: List[pyspark.rdd.RDD[~T]]) -> pyspark.rdd.RDD[~T]:
|
|
173
|
+
# pass
|
|
174
|
+
|
|
175
|
+
# def wholeTextFiles(self, path: str, minPartitions: Optional[int] = None, use_unicode: bool = True
|
|
176
|
+
# ) -> pyspark.rdd.RDD[typing.Tuple[str, str]]:
|
|
177
|
+
# pass
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
__all__ = ["SparkContext"]
|