duckdb 1.4.1.dev113__cp39-cp39-macosx_10_9_universal2.whl → 1.5.0.dev37__cp39-cp39-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckdb might be problematic. Click here for more details.
- _duckdb.cpython-39-darwin.so +0 -0
- duckdb/__init__.py +374 -373
- duckdb/__init__.pyi +180 -604
- duckdb/bytes_io_wrapper.py +7 -6
- duckdb/experimental/__init__.py +1 -2
- duckdb/experimental/spark/__init__.py +4 -3
- duckdb/experimental/spark/_globals.py +8 -8
- duckdb/experimental/spark/_typing.py +9 -7
- duckdb/experimental/spark/conf.py +15 -16
- duckdb/experimental/spark/context.py +44 -60
- duckdb/experimental/spark/errors/__init__.py +35 -33
- duckdb/experimental/spark/errors/error_classes.py +1 -1
- duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
- duckdb/experimental/spark/errors/exceptions/base.py +88 -39
- duckdb/experimental/spark/errors/utils.py +16 -11
- duckdb/experimental/spark/exception.py +6 -9
- duckdb/experimental/spark/sql/__init__.py +5 -5
- duckdb/experimental/spark/sql/_typing.py +15 -8
- duckdb/experimental/spark/sql/catalog.py +20 -21
- duckdb/experimental/spark/sql/column.py +54 -47
- duckdb/experimental/spark/sql/conf.py +8 -9
- duckdb/experimental/spark/sql/dataframe.py +233 -185
- duckdb/experimental/spark/sql/functions.py +1248 -1222
- duckdb/experimental/spark/sql/group.py +52 -56
- duckdb/experimental/spark/sql/readwriter.py +94 -80
- duckdb/experimental/spark/sql/session.py +59 -64
- duckdb/experimental/spark/sql/streaming.py +10 -9
- duckdb/experimental/spark/sql/type_utils.py +64 -66
- duckdb/experimental/spark/sql/types.py +344 -308
- duckdb/experimental/spark/sql/udf.py +6 -6
- duckdb/filesystem.py +8 -13
- duckdb/functional/__init__.py +16 -2
- duckdb/polars_io.py +57 -66
- duckdb/query_graph/__main__.py +96 -91
- duckdb/typing/__init__.py +8 -8
- duckdb/typing/__init__.pyi +2 -4
- duckdb/udf.py +5 -10
- duckdb/value/__init__.py +0 -1
- duckdb/value/constant/__init__.py +59 -61
- duckdb/value/constant/__init__.pyi +4 -3
- duckdb-1.5.0.dev37.dist-info/METADATA +80 -0
- duckdb-1.5.0.dev37.dist-info/RECORD +47 -0
- duckdb-1.4.1.dev113.dist-info/METADATA +0 -326
- duckdb-1.4.1.dev113.dist-info/RECORD +0 -47
- {duckdb-1.4.1.dev113.dist-info → duckdb-1.5.0.dev37.dist-info}/WHEEL +0 -0
- {duckdb-1.4.1.dev113.dist-info → duckdb-1.5.0.dev37.dist-info}/licenses/LICENSE +0 -0
duckdb/bytes_io_wrapper.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from io import StringIO, TextIOBase
|
|
2
|
-
from typing import
|
|
1
|
+
from io import StringIO, TextIOBase
|
|
2
|
+
from typing import Union
|
|
3
3
|
|
|
4
4
|
"""
|
|
5
5
|
BSD 3-Clause License
|
|
@@ -36,10 +36,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
class BytesIOWrapper:
|
|
39
|
+
class BytesIOWrapper:
|
|
40
40
|
# Wrapper that wraps a StringIO buffer and reads bytes from it
|
|
41
41
|
# Created for compat with pyarrow read_csv
|
|
42
|
-
def __init__(self, buffer: Union[StringIO, TextIOBase], encoding: str = "utf-8") -> None:
|
|
42
|
+
def __init__(self, buffer: Union[StringIO, TextIOBase], encoding: str = "utf-8") -> None:
|
|
43
43
|
self.buffer = buffer
|
|
44
44
|
self.encoding = encoding
|
|
45
45
|
# Because a character can be represented by more than 1 byte,
|
|
@@ -48,10 +48,10 @@ class BytesIOWrapper: # noqa: D101
|
|
|
48
48
|
# overflow to the front of the bytestring the next time reading is performed
|
|
49
49
|
self.overflow = b""
|
|
50
50
|
|
|
51
|
-
def __getattr__(self, attr: str)
|
|
51
|
+
def __getattr__(self, attr: str):
|
|
52
52
|
return getattr(self.buffer, attr)
|
|
53
53
|
|
|
54
|
-
def read(self, n: Union[int, None] = -1) -> bytes:
|
|
54
|
+
def read(self, n: Union[int, None] = -1) -> bytes:
|
|
55
55
|
assert self.buffer is not None
|
|
56
56
|
bytestring = self.buffer.read(n).encode(self.encoding)
|
|
57
57
|
# When n=-1/n greater than remaining bytes: Read entire file/rest of file
|
|
@@ -63,3 +63,4 @@ class BytesIOWrapper: # noqa: D101
|
|
|
63
63
|
to_return = combined_bytestring[:n]
|
|
64
64
|
self.overflow = combined_bytestring[n:]
|
|
65
65
|
return to_return
|
|
66
|
+
|
duckdb/experimental/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from .sql import SparkSession, DataFrame
|
|
2
|
+
from .conf import SparkConf
|
|
2
3
|
from .context import SparkContext
|
|
4
|
+
from ._globals import _NoValue
|
|
3
5
|
from .exception import ContributionsAcceptedError
|
|
4
|
-
from .sql import DataFrame, SparkSession
|
|
5
6
|
|
|
6
|
-
__all__ = ["
|
|
7
|
+
__all__ = ["SparkSession", "DataFrame", "SparkConf", "SparkContext", "ContributionsAcceptedError"]
|
|
@@ -15,7 +15,8 @@
|
|
|
15
15
|
# limitations under the License.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
"""
|
|
18
|
+
"""
|
|
19
|
+
Module defining global singleton classes.
|
|
19
20
|
|
|
20
21
|
This module raises a RuntimeError if an attempt to reload it is made. In that
|
|
21
22
|
way the identities of the classes defined here are fixed and will remain so
|
|
@@ -37,8 +38,7 @@ __ALL__ = ["_NoValue"]
|
|
|
37
38
|
# Disallow reloading this module so as to preserve the identities of the
|
|
38
39
|
# classes defined here.
|
|
39
40
|
if "_is_loaded" in globals():
|
|
40
|
-
|
|
41
|
-
raise RuntimeError(msg)
|
|
41
|
+
raise RuntimeError("Reloading duckdb.experimental.spark._globals is not allowed")
|
|
42
42
|
_is_loaded = True
|
|
43
43
|
|
|
44
44
|
|
|
@@ -54,23 +54,23 @@ class _NoValueType:
|
|
|
54
54
|
|
|
55
55
|
__instance = None
|
|
56
56
|
|
|
57
|
-
def __new__(cls)
|
|
57
|
+
def __new__(cls):
|
|
58
58
|
# ensure that only one instance exists
|
|
59
59
|
if not cls.__instance:
|
|
60
|
-
cls.__instance = super().__new__(cls)
|
|
60
|
+
cls.__instance = super(_NoValueType, cls).__new__(cls)
|
|
61
61
|
return cls.__instance
|
|
62
62
|
|
|
63
63
|
# Make the _NoValue instance falsey
|
|
64
|
-
def __nonzero__(self)
|
|
64
|
+
def __nonzero__(self):
|
|
65
65
|
return False
|
|
66
66
|
|
|
67
67
|
__bool__ = __nonzero__
|
|
68
68
|
|
|
69
69
|
# needed for python 2 to preserve identity through a pickle
|
|
70
|
-
def __reduce__(self)
|
|
70
|
+
def __reduce__(self):
|
|
71
71
|
return (self.__class__, ())
|
|
72
72
|
|
|
73
|
-
def __repr__(self)
|
|
73
|
+
def __repr__(self):
|
|
74
74
|
return "<no value>"
|
|
75
75
|
|
|
76
76
|
|
|
@@ -16,11 +16,10 @@
|
|
|
16
16
|
# specific language governing permissions and limitations
|
|
17
17
|
# under the License.
|
|
18
18
|
|
|
19
|
-
from
|
|
20
|
-
from
|
|
19
|
+
from typing import Callable, Iterable, Sized, TypeVar, Union
|
|
20
|
+
from typing_extensions import Literal, Protocol
|
|
21
21
|
|
|
22
|
-
from numpy import
|
|
23
|
-
from typing_extensions import Literal, Protocol, Self
|
|
22
|
+
from numpy import int32, int64, float32, float64, ndarray
|
|
24
23
|
|
|
25
24
|
F = TypeVar("F", bound=Callable)
|
|
26
25
|
T_co = TypeVar("T_co", covariant=True)
|
|
@@ -31,14 +30,17 @@ NonUDFType = Literal[0]
|
|
|
31
30
|
|
|
32
31
|
|
|
33
32
|
class SupportsIAdd(Protocol):
|
|
34
|
-
def __iadd__(self, other: "SupportsIAdd") ->
|
|
33
|
+
def __iadd__(self, other: "SupportsIAdd") -> "SupportsIAdd":
|
|
34
|
+
...
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
class SupportsOrdering(Protocol):
|
|
38
|
-
def __lt__(self, other: "SupportsOrdering") -> bool:
|
|
38
|
+
def __lt__(self, other: "SupportsOrdering") -> bool:
|
|
39
|
+
...
|
|
39
40
|
|
|
40
41
|
|
|
41
|
-
class SizedIterable(Protocol, Sized, Iterable[T_co]):
|
|
42
|
+
class SizedIterable(Protocol, Sized, Iterable[T_co]):
|
|
43
|
+
...
|
|
42
44
|
|
|
43
45
|
|
|
44
46
|
S = TypeVar("S", bound=SupportsOrdering)
|
|
@@ -1,45 +1,44 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
1
|
+
from typing import Optional, List, Tuple
|
|
3
2
|
from duckdb.experimental.spark.exception import ContributionsAcceptedError
|
|
4
3
|
|
|
5
4
|
|
|
6
|
-
class SparkConf:
|
|
7
|
-
def __init__(self)
|
|
5
|
+
class SparkConf:
|
|
6
|
+
def __init__(self):
|
|
8
7
|
raise NotImplementedError
|
|
9
8
|
|
|
10
|
-
def contains(self, key: str) -> bool:
|
|
9
|
+
def contains(self, key: str) -> bool:
|
|
11
10
|
raise ContributionsAcceptedError
|
|
12
11
|
|
|
13
|
-
def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]:
|
|
12
|
+
def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]:
|
|
14
13
|
raise ContributionsAcceptedError
|
|
15
14
|
|
|
16
|
-
def getAll(self) ->
|
|
15
|
+
def getAll(self) -> List[Tuple[str, str]]:
|
|
17
16
|
raise ContributionsAcceptedError
|
|
18
17
|
|
|
19
|
-
def set(self, key: str, value: str) -> "SparkConf":
|
|
18
|
+
def set(self, key: str, value: str) -> "SparkConf":
|
|
20
19
|
raise ContributionsAcceptedError
|
|
21
20
|
|
|
22
|
-
def setAll(self, pairs:
|
|
21
|
+
def setAll(self, pairs: List[Tuple[str, str]]) -> "SparkConf":
|
|
23
22
|
raise ContributionsAcceptedError
|
|
24
23
|
|
|
25
|
-
def setAppName(self, value: str) -> "SparkConf":
|
|
24
|
+
def setAppName(self, value: str) -> "SparkConf":
|
|
26
25
|
raise ContributionsAcceptedError
|
|
27
26
|
|
|
28
|
-
def setExecutorEnv(
|
|
29
|
-
self, key: Optional[str] = None, value: Optional[str] = None, pairs: Optional[
|
|
27
|
+
def setExecutorEnv(
|
|
28
|
+
self, key: Optional[str] = None, value: Optional[str] = None, pairs: Optional[List[Tuple[str, str]]] = None
|
|
30
29
|
) -> "SparkConf":
|
|
31
30
|
raise ContributionsAcceptedError
|
|
32
31
|
|
|
33
|
-
def setIfMissing(self, key: str, value: str) -> "SparkConf":
|
|
32
|
+
def setIfMissing(self, key: str, value: str) -> "SparkConf":
|
|
34
33
|
raise ContributionsAcceptedError
|
|
35
34
|
|
|
36
|
-
def setMaster(self, value: str) -> "SparkConf":
|
|
35
|
+
def setMaster(self, value: str) -> "SparkConf":
|
|
37
36
|
raise ContributionsAcceptedError
|
|
38
37
|
|
|
39
|
-
def setSparkHome(self, value: str) -> "SparkConf":
|
|
38
|
+
def setSparkHome(self, value: str) -> "SparkConf":
|
|
40
39
|
raise ContributionsAcceptedError
|
|
41
40
|
|
|
42
|
-
def toDebugString(self) -> str:
|
|
41
|
+
def toDebugString(self) -> str:
|
|
43
42
|
raise ContributionsAcceptedError
|
|
44
43
|
|
|
45
44
|
|
|
@@ -1,42 +1,42 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
1
|
+
from typing import Optional
|
|
3
2
|
import duckdb
|
|
4
3
|
from duckdb import DuckDBPyConnection
|
|
5
|
-
|
|
4
|
+
|
|
6
5
|
from duckdb.experimental.spark.exception import ContributionsAcceptedError
|
|
6
|
+
from duckdb.experimental.spark.conf import SparkConf
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class SparkContext:
|
|
10
|
-
def __init__(self, master: str)
|
|
11
|
-
self._connection = duckdb.connect(
|
|
9
|
+
class SparkContext:
|
|
10
|
+
def __init__(self, master: str):
|
|
11
|
+
self._connection = duckdb.connect(':memory:')
|
|
12
12
|
# This aligns the null ordering with Spark.
|
|
13
13
|
self._connection.execute("set default_null_order='nulls_first_on_asc_last_on_desc'")
|
|
14
14
|
|
|
15
15
|
@property
|
|
16
|
-
def connection(self) -> DuckDBPyConnection:
|
|
16
|
+
def connection(self) -> DuckDBPyConnection:
|
|
17
17
|
return self._connection
|
|
18
18
|
|
|
19
|
-
def stop(self) -> None:
|
|
19
|
+
def stop(self) -> None:
|
|
20
20
|
self._connection.close()
|
|
21
21
|
|
|
22
22
|
@classmethod
|
|
23
|
-
def getOrCreate(cls, conf: Optional[SparkConf] = None) -> "SparkContext":
|
|
23
|
+
def getOrCreate(cls, conf: Optional[SparkConf] = None) -> "SparkContext":
|
|
24
24
|
raise ContributionsAcceptedError
|
|
25
25
|
|
|
26
26
|
@classmethod
|
|
27
|
-
def setSystemProperty(cls, key: str, value: str) -> None:
|
|
27
|
+
def setSystemProperty(cls, key: str, value: str) -> None:
|
|
28
28
|
raise ContributionsAcceptedError
|
|
29
29
|
|
|
30
30
|
@property
|
|
31
|
-
def applicationId(self) -> str:
|
|
31
|
+
def applicationId(self) -> str:
|
|
32
32
|
raise ContributionsAcceptedError
|
|
33
33
|
|
|
34
34
|
@property
|
|
35
|
-
def defaultMinPartitions(self) -> int:
|
|
35
|
+
def defaultMinPartitions(self) -> int:
|
|
36
36
|
raise ContributionsAcceptedError
|
|
37
37
|
|
|
38
38
|
@property
|
|
39
|
-
def defaultParallelism(self) -> int:
|
|
39
|
+
def defaultParallelism(self) -> int:
|
|
40
40
|
raise ContributionsAcceptedError
|
|
41
41
|
|
|
42
42
|
# @property
|
|
@@ -44,35 +44,33 @@ class SparkContext: # noqa: D101
|
|
|
44
44
|
# raise ContributionsAcceptedError
|
|
45
45
|
|
|
46
46
|
@property
|
|
47
|
-
def startTime(self) -> str:
|
|
47
|
+
def startTime(self) -> str:
|
|
48
48
|
raise ContributionsAcceptedError
|
|
49
49
|
|
|
50
50
|
@property
|
|
51
|
-
def uiWebUrl(self) -> str:
|
|
51
|
+
def uiWebUrl(self) -> str:
|
|
52
52
|
raise ContributionsAcceptedError
|
|
53
53
|
|
|
54
54
|
@property
|
|
55
|
-
def version(self) -> str:
|
|
55
|
+
def version(self) -> str:
|
|
56
56
|
raise ContributionsAcceptedError
|
|
57
57
|
|
|
58
|
-
def __repr__(self) -> str:
|
|
58
|
+
def __repr__(self) -> str:
|
|
59
59
|
raise ContributionsAcceptedError
|
|
60
60
|
|
|
61
|
-
# def accumulator(self, value: ~T, accum_param: Optional[ForwardRef('AccumulatorParam[T]')] = None
|
|
62
|
-
# ) -> 'Accumulator[T]':
|
|
61
|
+
# def accumulator(self, value: ~T, accum_param: Optional[ForwardRef('AccumulatorParam[T]')] = None) -> 'Accumulator[T]':
|
|
63
62
|
# pass
|
|
64
63
|
|
|
65
|
-
def addArchive(self, path: str) -> None:
|
|
64
|
+
def addArchive(self, path: str) -> None:
|
|
66
65
|
raise ContributionsAcceptedError
|
|
67
66
|
|
|
68
|
-
def addFile(self, path: str, recursive: bool = False) -> None:
|
|
67
|
+
def addFile(self, path: str, recursive: bool = False) -> None:
|
|
69
68
|
raise ContributionsAcceptedError
|
|
70
69
|
|
|
71
|
-
def addPyFile(self, path: str) -> None:
|
|
70
|
+
def addPyFile(self, path: str) -> None:
|
|
72
71
|
raise ContributionsAcceptedError
|
|
73
72
|
|
|
74
|
-
# def binaryFiles(self, path: str, minPartitions: Optional[int] = None
|
|
75
|
-
# ) -> duckdb.experimental.spark.rdd.RDD[typing.Tuple[str, bytes]]:
|
|
73
|
+
# def binaryFiles(self, path: str, minPartitions: Optional[int] = None) -> duckdb.experimental.spark.rdd.RDD[typing.Tuple[str, bytes]]:
|
|
76
74
|
# pass
|
|
77
75
|
|
|
78
76
|
# def binaryRecords(self, path: str, recordLength: int) -> duckdb.experimental.spark.rdd.RDD[bytes]:
|
|
@@ -81,45 +79,37 @@ class SparkContext: # noqa: D101
|
|
|
81
79
|
# def broadcast(self, value: ~T) -> 'Broadcast[T]':
|
|
82
80
|
# pass
|
|
83
81
|
|
|
84
|
-
def cancelAllJobs(self) -> None:
|
|
82
|
+
def cancelAllJobs(self) -> None:
|
|
85
83
|
raise ContributionsAcceptedError
|
|
86
84
|
|
|
87
|
-
def cancelJobGroup(self, groupId: str) -> None:
|
|
85
|
+
def cancelJobGroup(self, groupId: str) -> None:
|
|
88
86
|
raise ContributionsAcceptedError
|
|
89
87
|
|
|
90
|
-
def dump_profiles(self, path: str) -> None:
|
|
88
|
+
def dump_profiles(self, path: str) -> None:
|
|
91
89
|
raise ContributionsAcceptedError
|
|
92
90
|
|
|
93
91
|
# def emptyRDD(self) -> duckdb.experimental.spark.rdd.RDD[typing.Any]:
|
|
94
92
|
# pass
|
|
95
93
|
|
|
96
|
-
def getCheckpointDir(self) -> Optional[str]:
|
|
94
|
+
def getCheckpointDir(self) -> Optional[str]:
|
|
97
95
|
raise ContributionsAcceptedError
|
|
98
96
|
|
|
99
|
-
def getConf(self) -> SparkConf:
|
|
97
|
+
def getConf(self) -> SparkConf:
|
|
100
98
|
raise ContributionsAcceptedError
|
|
101
99
|
|
|
102
|
-
def getLocalProperty(self, key: str) -> Optional[str]:
|
|
100
|
+
def getLocalProperty(self, key: str) -> Optional[str]:
|
|
103
101
|
raise ContributionsAcceptedError
|
|
104
102
|
|
|
105
|
-
# def hadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str,
|
|
106
|
-
# keyConverter: Optional[str] = None, valueConverter: Optional[str] = None,
|
|
107
|
-
# conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
103
|
+
# def hadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
108
104
|
# pass
|
|
109
105
|
|
|
110
|
-
# def hadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None,
|
|
111
|
-
# valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0
|
|
112
|
-
# ) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
106
|
+
# def hadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
113
107
|
# pass
|
|
114
108
|
|
|
115
|
-
# def newAPIHadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str,
|
|
116
|
-
# keyConverter: Optional[str] = None, valueConverter: Optional[str] = None,
|
|
117
|
-
# conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
109
|
+
# def newAPIHadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
118
110
|
# pass
|
|
119
111
|
|
|
120
|
-
# def newAPIHadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str,
|
|
121
|
-
# keyConverter: Optional[str] = None, valueConverter: Optional[str] = None,
|
|
122
|
-
# conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
112
|
+
# def newAPIHadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
123
113
|
# pass
|
|
124
114
|
|
|
125
115
|
# def parallelize(self, c: Iterable[~T], numSlices: Optional[int] = None) -> pyspark.rdd.RDD[~T]:
|
|
@@ -128,52 +118,46 @@ class SparkContext: # noqa: D101
|
|
|
128
118
|
# def pickleFile(self, name: str, minPartitions: Optional[int] = None) -> pyspark.rdd.RDD[typing.Any]:
|
|
129
119
|
# pass
|
|
130
120
|
|
|
131
|
-
# def range(self, start: int, end: Optional[int] = None, step: int = 1, numSlices: Optional[int] = None
|
|
132
|
-
# ) -> pyspark.rdd.RDD[int]:
|
|
121
|
+
# def range(self, start: int, end: Optional[int] = None, step: int = 1, numSlices: Optional[int] = None) -> pyspark.rdd.RDD[int]:
|
|
133
122
|
# pass
|
|
134
123
|
|
|
135
|
-
# def runJob(self, rdd: pyspark.rdd.RDD[~T], partitionFunc: Callable[[Iterable[~T]], Iterable[~U]],
|
|
136
|
-
# partitions: Optional[Sequence[int]] = None, allowLocal: bool = False) -> List[~U]:
|
|
124
|
+
# def runJob(self, rdd: pyspark.rdd.RDD[~T], partitionFunc: Callable[[Iterable[~T]], Iterable[~U]], partitions: Optional[Sequence[int]] = None, allowLocal: bool = False) -> List[~U]:
|
|
137
125
|
# pass
|
|
138
126
|
|
|
139
|
-
# def sequenceFile(self, path: str, keyClass: Optional[str] = None, valueClass: Optional[str] = None,
|
|
140
|
-
# keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, minSplits: Optional[int] = None,
|
|
141
|
-
# batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
127
|
+
# def sequenceFile(self, path: str, keyClass: Optional[str] = None, valueClass: Optional[str] = None, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, minSplits: Optional[int] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
142
128
|
# pass
|
|
143
129
|
|
|
144
|
-
def setCheckpointDir(self, dirName: str) -> None:
|
|
130
|
+
def setCheckpointDir(self, dirName: str) -> None:
|
|
145
131
|
raise ContributionsAcceptedError
|
|
146
132
|
|
|
147
|
-
def setJobDescription(self, value: str) -> None:
|
|
133
|
+
def setJobDescription(self, value: str) -> None:
|
|
148
134
|
raise ContributionsAcceptedError
|
|
149
135
|
|
|
150
|
-
def setJobGroup(self, groupId: str, description: str, interruptOnCancel: bool = False) -> None:
|
|
136
|
+
def setJobGroup(self, groupId: str, description: str, interruptOnCancel: bool = False) -> None:
|
|
151
137
|
raise ContributionsAcceptedError
|
|
152
138
|
|
|
153
|
-
def setLocalProperty(self, key: str, value: str) -> None:
|
|
139
|
+
def setLocalProperty(self, key: str, value: str) -> None:
|
|
154
140
|
raise ContributionsAcceptedError
|
|
155
141
|
|
|
156
|
-
def setLogLevel(self, logLevel: str) -> None:
|
|
142
|
+
def setLogLevel(self, logLevel: str) -> None:
|
|
157
143
|
raise ContributionsAcceptedError
|
|
158
144
|
|
|
159
|
-
def show_profiles(self) -> None:
|
|
145
|
+
def show_profiles(self) -> None:
|
|
160
146
|
raise ContributionsAcceptedError
|
|
161
147
|
|
|
162
|
-
def sparkUser(self) -> str:
|
|
148
|
+
def sparkUser(self) -> str:
|
|
163
149
|
raise ContributionsAcceptedError
|
|
164
150
|
|
|
165
151
|
# def statusTracker(self) -> duckdb.experimental.spark.status.StatusTracker:
|
|
166
152
|
# raise ContributionsAcceptedError
|
|
167
153
|
|
|
168
|
-
# def textFile(self, name: str, minPartitions: Optional[int] = None, use_unicode: bool = True
|
|
169
|
-
# ) -> pyspark.rdd.RDD[str]:
|
|
154
|
+
# def textFile(self, name: str, minPartitions: Optional[int] = None, use_unicode: bool = True) -> pyspark.rdd.RDD[str]:
|
|
170
155
|
# pass
|
|
171
156
|
|
|
172
157
|
# def union(self, rdds: List[pyspark.rdd.RDD[~T]]) -> pyspark.rdd.RDD[~T]:
|
|
173
158
|
# pass
|
|
174
159
|
|
|
175
|
-
# def wholeTextFiles(self, path: str, minPartitions: Optional[int] = None, use_unicode: bool = True
|
|
176
|
-
# ) -> pyspark.rdd.RDD[typing.Tuple[str, str]]:
|
|
160
|
+
# def wholeTextFiles(self, path: str, minPartitions: Optional[int] = None, use_unicode: bool = True) -> pyspark.rdd.RDD[typing.Tuple[str, str]]:
|
|
177
161
|
# pass
|
|
178
162
|
|
|
179
163
|
|
|
@@ -15,56 +15,58 @@
|
|
|
15
15
|
# limitations under the License.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
"""
|
|
19
|
+
PySpark exceptions.
|
|
20
|
+
"""
|
|
21
|
+
from .exceptions.base import ( # noqa: F401
|
|
22
|
+
PySparkException,
|
|
21
23
|
AnalysisException,
|
|
24
|
+
TempTableAlreadyExistsException,
|
|
25
|
+
ParseException,
|
|
26
|
+
IllegalArgumentException,
|
|
22
27
|
ArithmeticException,
|
|
28
|
+
UnsupportedOperationException,
|
|
23
29
|
ArrayIndexOutOfBoundsException,
|
|
24
30
|
DateTimeException,
|
|
25
|
-
IllegalArgumentException,
|
|
26
31
|
NumberFormatException,
|
|
27
|
-
|
|
28
|
-
PySparkAssertionError,
|
|
29
|
-
PySparkAttributeError,
|
|
30
|
-
PySparkException,
|
|
31
|
-
PySparkIndexError,
|
|
32
|
-
PySparkNotImplementedError,
|
|
33
|
-
PySparkRuntimeError,
|
|
34
|
-
PySparkTypeError,
|
|
35
|
-
PySparkValueError,
|
|
36
|
-
PythonException,
|
|
32
|
+
StreamingQueryException,
|
|
37
33
|
QueryExecutionException,
|
|
34
|
+
PythonException,
|
|
35
|
+
UnknownException,
|
|
38
36
|
SparkRuntimeException,
|
|
39
37
|
SparkUpgradeException,
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
PySparkTypeError,
|
|
39
|
+
PySparkValueError,
|
|
40
|
+
PySparkIndexError,
|
|
41
|
+
PySparkAttributeError,
|
|
42
|
+
PySparkRuntimeError,
|
|
43
|
+
PySparkAssertionError,
|
|
44
|
+
PySparkNotImplementedError,
|
|
44
45
|
)
|
|
45
46
|
|
|
47
|
+
|
|
46
48
|
__all__ = [
|
|
49
|
+
"PySparkException",
|
|
47
50
|
"AnalysisException",
|
|
51
|
+
"TempTableAlreadyExistsException",
|
|
52
|
+
"ParseException",
|
|
53
|
+
"IllegalArgumentException",
|
|
48
54
|
"ArithmeticException",
|
|
55
|
+
"UnsupportedOperationException",
|
|
49
56
|
"ArrayIndexOutOfBoundsException",
|
|
50
57
|
"DateTimeException",
|
|
51
|
-
"IllegalArgumentException",
|
|
52
58
|
"NumberFormatException",
|
|
53
|
-
"
|
|
54
|
-
"PySparkAssertionError",
|
|
55
|
-
"PySparkAttributeError",
|
|
56
|
-
"PySparkException",
|
|
57
|
-
"PySparkIndexError",
|
|
58
|
-
"PySparkNotImplementedError",
|
|
59
|
-
"PySparkRuntimeError",
|
|
60
|
-
"PySparkTypeError",
|
|
61
|
-
"PySparkValueError",
|
|
62
|
-
"PythonException",
|
|
59
|
+
"StreamingQueryException",
|
|
63
60
|
"QueryExecutionException",
|
|
61
|
+
"PythonException",
|
|
62
|
+
"UnknownException",
|
|
64
63
|
"SparkRuntimeException",
|
|
65
64
|
"SparkUpgradeException",
|
|
66
|
-
"
|
|
67
|
-
"
|
|
68
|
-
"
|
|
69
|
-
"
|
|
65
|
+
"PySparkTypeError",
|
|
66
|
+
"PySparkValueError",
|
|
67
|
+
"PySparkIndexError",
|
|
68
|
+
"PySparkAttributeError",
|
|
69
|
+
"PySparkRuntimeError",
|
|
70
|
+
"PySparkAssertionError",
|
|
71
|
+
"PySparkNotImplementedError",
|
|
70
72
|
]
|