duckdb 0.8.2.dev3007__cp311-cp311-win_amd64.whl → 1.4.3.dev8__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _duckdb-stubs/__init__.pyi +1478 -0
- _duckdb-stubs/_func.pyi +46 -0
- _duckdb-stubs/_sqltypes.pyi +75 -0
- duckdb/duckdb.cp311-win_amd64.pyd → _duckdb.cp311-win_amd64.pyd +0 -0
- adbc_driver_duckdb/__init__.py +10 -8
- adbc_driver_duckdb/dbapi.py +4 -5
- duckdb/__init__.py +250 -196
- duckdb/_dbapi_type_object.py +231 -0
- duckdb/_version.py +22 -0
- {pyduckdb → duckdb}/bytes_io_wrapper.py +12 -8
- duckdb/experimental/__init__.py +5 -0
- duckdb/experimental/spark/__init__.py +6 -0
- {pyduckdb → duckdb/experimental}/spark/_globals.py +8 -8
- duckdb/experimental/spark/_typing.py +46 -0
- duckdb/experimental/spark/conf.py +46 -0
- duckdb/experimental/spark/context.py +180 -0
- duckdb/experimental/spark/errors/__init__.py +70 -0
- duckdb/experimental/spark/errors/error_classes.py +918 -0
- duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
- duckdb/experimental/spark/errors/exceptions/base.py +168 -0
- duckdb/experimental/spark/errors/utils.py +111 -0
- duckdb/experimental/spark/exception.py +18 -0
- {pyduckdb → duckdb/experimental}/spark/sql/__init__.py +5 -5
- duckdb/experimental/spark/sql/_typing.py +86 -0
- duckdb/experimental/spark/sql/catalog.py +79 -0
- duckdb/experimental/spark/sql/column.py +361 -0
- duckdb/experimental/spark/sql/conf.py +24 -0
- duckdb/experimental/spark/sql/dataframe.py +1389 -0
- duckdb/experimental/spark/sql/functions.py +6195 -0
- duckdb/experimental/spark/sql/group.py +424 -0
- duckdb/experimental/spark/sql/readwriter.py +435 -0
- duckdb/experimental/spark/sql/session.py +297 -0
- duckdb/experimental/spark/sql/streaming.py +36 -0
- duckdb/experimental/spark/sql/type_utils.py +107 -0
- {pyduckdb → duckdb/experimental}/spark/sql/types.py +323 -342
- duckdb/experimental/spark/sql/udf.py +37 -0
- duckdb/filesystem.py +33 -0
- duckdb/func/__init__.py +3 -0
- duckdb/functional/__init__.py +12 -16
- duckdb/polars_io.py +284 -0
- duckdb/py.typed +0 -0
- duckdb/query_graph/__main__.py +358 -0
- duckdb/sqltypes/__init__.py +63 -0
- duckdb/typing/__init__.py +18 -6
- {pyduckdb → duckdb}/udf.py +10 -5
- duckdb/value/__init__.py +1 -0
- pyduckdb/value/constant.py → duckdb/value/constant/__init__.py +66 -57
- duckdb-1.4.3.dev8.dist-info/METADATA +88 -0
- duckdb-1.4.3.dev8.dist-info/RECORD +52 -0
- {duckdb-0.8.2.dev3007.dist-info → duckdb-1.4.3.dev8.dist-info}/WHEEL +1 -1
- duckdb-1.4.3.dev8.dist-info/licenses/LICENSE +7 -0
- duckdb-0.8.2.dev3007.dist-info/METADATA +0 -20
- duckdb-0.8.2.dev3007.dist-info/RECORD +0 -34
- duckdb-0.8.2.dev3007.dist-info/top_level.txt +0 -4
- duckdb-stubs/__init__.pyi +0 -574
- duckdb-stubs/functional/__init__.pyi +0 -33
- duckdb-stubs/typing/__init__.pyi +0 -35
- pyduckdb/__init__.py +0 -61
- pyduckdb/filesystem.py +0 -64
- pyduckdb/spark/__init__.py +0 -7
- pyduckdb/spark/conf.py +0 -45
- pyduckdb/spark/context.py +0 -162
- pyduckdb/spark/exception.py +0 -9
- pyduckdb/spark/sql/catalog.py +0 -78
- pyduckdb/spark/sql/conf.py +0 -23
- pyduckdb/spark/sql/dataframe.py +0 -75
- pyduckdb/spark/sql/readwriter.py +0 -180
- pyduckdb/spark/sql/session.py +0 -249
- pyduckdb/spark/sql/streaming.py +0 -37
- pyduckdb/spark/sql/type_utils.py +0 -104
- pyduckdb/spark/sql/udf.py +0 -9
- {pyduckdb → duckdb/experimental}/spark/LICENSE +0 -0
pyduckdb/__init__.py
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
from .value.constant import (
|
|
2
|
-
Value,
|
|
3
|
-
NullValue,
|
|
4
|
-
BooleanValue,
|
|
5
|
-
UnsignedBinaryValue,
|
|
6
|
-
UnsignedShortValue,
|
|
7
|
-
UnsignedIntegerValue,
|
|
8
|
-
UnsignedLongValue,
|
|
9
|
-
BinaryValue,
|
|
10
|
-
ShortValue,
|
|
11
|
-
IntegerValue,
|
|
12
|
-
LongValue,
|
|
13
|
-
HugeIntegerValue,
|
|
14
|
-
FloatValue,
|
|
15
|
-
DoubleValue,
|
|
16
|
-
DecimalValue,
|
|
17
|
-
StringValue,
|
|
18
|
-
UUIDValue,
|
|
19
|
-
BitValue,
|
|
20
|
-
BlobValue,
|
|
21
|
-
DateValue,
|
|
22
|
-
IntervalValue,
|
|
23
|
-
TimestampValue,
|
|
24
|
-
TimestampSecondValue,
|
|
25
|
-
TimestampMilisecondValue,
|
|
26
|
-
TimestampNanosecondValue,
|
|
27
|
-
TimestampTimeZoneValue,
|
|
28
|
-
TimeValue,
|
|
29
|
-
TimeTimeZoneValue,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
__all__ = [
|
|
33
|
-
"Value",
|
|
34
|
-
"NullValue",
|
|
35
|
-
"BooleanValue",
|
|
36
|
-
"UnsignedBinaryValue",
|
|
37
|
-
"UnsignedShortValue",
|
|
38
|
-
"UnsignedIntegerValue",
|
|
39
|
-
"UnsignedLongValue",
|
|
40
|
-
"BinaryValue",
|
|
41
|
-
"ShortValue",
|
|
42
|
-
"IntegerValue",
|
|
43
|
-
"LongValue",
|
|
44
|
-
"HugeIntegerValue",
|
|
45
|
-
"FloatValue",
|
|
46
|
-
"DoubleValue",
|
|
47
|
-
"DecimalValue",
|
|
48
|
-
"StringValue",
|
|
49
|
-
"UUIDValue",
|
|
50
|
-
"BitValue",
|
|
51
|
-
"BlobValue",
|
|
52
|
-
"DateValue",
|
|
53
|
-
"IntervalValue",
|
|
54
|
-
"TimestampValue",
|
|
55
|
-
"TimestampSecondValue",
|
|
56
|
-
"TimestampMilisecondValue",
|
|
57
|
-
"TimestampNanosecondValue",
|
|
58
|
-
"TimestampTimeZoneValue",
|
|
59
|
-
"TimeValue",
|
|
60
|
-
"TimeTimeZoneValue",
|
|
61
|
-
]
|
pyduckdb/filesystem.py
DELETED
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
from fsspec import filesystem, AbstractFileSystem
|
|
2
|
-
from fsspec.implementations.memory import MemoryFileSystem
|
|
3
|
-
from shutil import copyfileobj
|
|
4
|
-
from .bytes_io_wrapper import BytesIOWrapper
|
|
5
|
-
from io import TextIOBase
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def is_file_like(obj):
|
|
9
|
-
# We only care that we can read from the file
|
|
10
|
-
return hasattr(obj, "read") and hasattr(obj, "seek")
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class ModifiedMemoryFileSystem(MemoryFileSystem):
|
|
14
|
-
protocol = ('DUCKDB_INTERNAL_OBJECTSTORE',)
|
|
15
|
-
# defer to the original implementation that doesn't hardcode the protocol
|
|
16
|
-
_strip_protocol = classmethod(AbstractFileSystem._strip_protocol.__func__)
|
|
17
|
-
|
|
18
|
-
# Add this manually because it's apparently missing on windows???
|
|
19
|
-
def unstrip_protocol(self, name):
|
|
20
|
-
"""Format FS-specific path to generic, including protocol"""
|
|
21
|
-
protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
|
|
22
|
-
for protocol in protos:
|
|
23
|
-
if name.startswith(f"{protocol}://"):
|
|
24
|
-
return name
|
|
25
|
-
return f"{protos[0]}://{name}"
|
|
26
|
-
|
|
27
|
-
def info(self, path, **kwargs):
|
|
28
|
-
path = self._strip_protocol(path)
|
|
29
|
-
if path in self.store:
|
|
30
|
-
filelike = self.store[path]
|
|
31
|
-
return {
|
|
32
|
-
"name": path,
|
|
33
|
-
"size": getattr(filelike, "size", 0),
|
|
34
|
-
"type": "file",
|
|
35
|
-
"created": getattr(filelike, "created", None),
|
|
36
|
-
}
|
|
37
|
-
else:
|
|
38
|
-
raise FileNotFoundError(path)
|
|
39
|
-
|
|
40
|
-
def _open(
|
|
41
|
-
self,
|
|
42
|
-
path,
|
|
43
|
-
mode="rb",
|
|
44
|
-
block_size=None,
|
|
45
|
-
autocommit=True,
|
|
46
|
-
cache_options=None,
|
|
47
|
-
**kwargs,
|
|
48
|
-
):
|
|
49
|
-
path = self._strip_protocol(path)
|
|
50
|
-
if path in self.store:
|
|
51
|
-
f = self.store[path]
|
|
52
|
-
return f
|
|
53
|
-
else:
|
|
54
|
-
raise FileNotFoundError(path)
|
|
55
|
-
|
|
56
|
-
def add_file(self, object, path):
|
|
57
|
-
if not is_file_like(object):
|
|
58
|
-
raise ValueError("Can not read from a non file-like object")
|
|
59
|
-
path = self._strip_protocol(path)
|
|
60
|
-
if isinstance(object, TextIOBase):
|
|
61
|
-
# Wrap this so that we can return a bytes object from 'read'
|
|
62
|
-
self.store[path] = BytesIOWrapper(object)
|
|
63
|
-
else:
|
|
64
|
-
self.store[path] = object
|
pyduckdb/spark/__init__.py
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
from .sql import SparkSession, DataFrame
|
|
2
|
-
from .conf import SparkConf
|
|
3
|
-
from .context import SparkContext
|
|
4
|
-
from ._globals import _NoValue
|
|
5
|
-
from .exception import ContributionsAcceptedError
|
|
6
|
-
|
|
7
|
-
__all__ = ["SparkSession", "DataFrame", "SparkConf", "SparkContext", "ContributionsAcceptedError"]
|
pyduckdb/spark/conf.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING, Optional, List, Tuple
|
|
2
|
-
from pyduckdb.spark.exception import ContributionsAcceptedError
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class SparkConf:
|
|
6
|
-
def __init__(self):
|
|
7
|
-
raise NotImplementedError
|
|
8
|
-
|
|
9
|
-
def contains(self, key: str) -> bool:
|
|
10
|
-
raise ContributionsAcceptedError
|
|
11
|
-
|
|
12
|
-
def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]:
|
|
13
|
-
raise ContributionsAcceptedError
|
|
14
|
-
|
|
15
|
-
def getAll(self) -> List[Tuple[str, str]]:
|
|
16
|
-
raise ContributionsAcceptedError
|
|
17
|
-
|
|
18
|
-
def set(self, key: str, value: str) -> "SparkConf":
|
|
19
|
-
raise ContributionsAcceptedError
|
|
20
|
-
|
|
21
|
-
def setAll(self, pairs: List[Tuple[str, str]]) -> "SparkConf":
|
|
22
|
-
raise ContributionsAcceptedError
|
|
23
|
-
|
|
24
|
-
def setAppName(self, value: str) -> "SparkConf":
|
|
25
|
-
raise ContributionsAcceptedError
|
|
26
|
-
|
|
27
|
-
def setExecutorEnv(
|
|
28
|
-
self, key: Optional[str] = None, value: Optional[str] = None, pairs: Optional[List[Tuple[str, str]]] = None
|
|
29
|
-
) -> "SparkConf":
|
|
30
|
-
raise ContributionsAcceptedError
|
|
31
|
-
|
|
32
|
-
def setIfMissing(self, key: str, value: str) -> "SparkConf":
|
|
33
|
-
raise ContributionsAcceptedError
|
|
34
|
-
|
|
35
|
-
def setMaster(self, value: str) -> "SparkConf":
|
|
36
|
-
raise ContributionsAcceptedError
|
|
37
|
-
|
|
38
|
-
def setSparkHome(self, value: str) -> "SparkConf":
|
|
39
|
-
raise ContributionsAcceptedError
|
|
40
|
-
|
|
41
|
-
def toDebugString(self) -> str:
|
|
42
|
-
raise ContributionsAcceptedError
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
__all__ = ["SparkConf"]
|
pyduckdb/spark/context.py
DELETED
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
import duckdb
|
|
3
|
-
from duckdb import DuckDBPyConnection
|
|
4
|
-
|
|
5
|
-
from pyduckdb.spark.exception import ContributionsAcceptedError
|
|
6
|
-
from pyduckdb.spark.conf import SparkConf
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class SparkContext:
|
|
10
|
-
def __init__(self, master: str):
|
|
11
|
-
self._connection = duckdb.connect(master)
|
|
12
|
-
|
|
13
|
-
@property
|
|
14
|
-
def connection(self) -> DuckDBPyConnection:
|
|
15
|
-
return self._connection
|
|
16
|
-
|
|
17
|
-
def stop(self) -> None:
|
|
18
|
-
self._connection.close()
|
|
19
|
-
|
|
20
|
-
@classmethod
|
|
21
|
-
def getOrCreate(cls, conf: Optional[SparkConf] = None) -> "SparkContext":
|
|
22
|
-
raise ContributionsAcceptedError
|
|
23
|
-
|
|
24
|
-
@classmethod
|
|
25
|
-
def setSystemProperty(cls, key: str, value: str) -> None:
|
|
26
|
-
raise ContributionsAcceptedError
|
|
27
|
-
|
|
28
|
-
@property
|
|
29
|
-
def applicationId(self) -> str:
|
|
30
|
-
raise ContributionsAcceptedError
|
|
31
|
-
|
|
32
|
-
@property
|
|
33
|
-
def defaultMinPartitions(self) -> int:
|
|
34
|
-
raise ContributionsAcceptedError
|
|
35
|
-
|
|
36
|
-
@property
|
|
37
|
-
def defaultParallelism(self) -> int:
|
|
38
|
-
raise ContributionsAcceptedError
|
|
39
|
-
|
|
40
|
-
# @property
|
|
41
|
-
# def resources(self) -> Dict[str, ResourceInformation]:
|
|
42
|
-
# raise ContributionsAcceptedError
|
|
43
|
-
|
|
44
|
-
@property
|
|
45
|
-
def startTime(self) -> str:
|
|
46
|
-
raise ContributionsAcceptedError
|
|
47
|
-
|
|
48
|
-
@property
|
|
49
|
-
def uiWebUrl(self) -> str:
|
|
50
|
-
raise ContributionsAcceptedError
|
|
51
|
-
|
|
52
|
-
@property
|
|
53
|
-
def version(self) -> str:
|
|
54
|
-
raise ContributionsAcceptedError
|
|
55
|
-
|
|
56
|
-
def __repr__(self) -> str:
|
|
57
|
-
raise ContributionsAcceptedError
|
|
58
|
-
|
|
59
|
-
# def accumulator(self, value: ~T, accum_param: Optional[ForwardRef('AccumulatorParam[T]')] = None) -> 'Accumulator[T]':
|
|
60
|
-
# pass
|
|
61
|
-
|
|
62
|
-
def addArchive(self, path: str) -> None:
|
|
63
|
-
raise ContributionsAcceptedError
|
|
64
|
-
|
|
65
|
-
def addFile(self, path: str, recursive: bool = False) -> None:
|
|
66
|
-
raise ContributionsAcceptedError
|
|
67
|
-
|
|
68
|
-
def addPyFile(self, path: str) -> None:
|
|
69
|
-
raise ContributionsAcceptedError
|
|
70
|
-
|
|
71
|
-
# def binaryFiles(self, path: str, minPartitions: Optional[int] = None) -> pyduckdb.spark.rdd.RDD[typing.Tuple[str, bytes]]:
|
|
72
|
-
# pass
|
|
73
|
-
|
|
74
|
-
# def binaryRecords(self, path: str, recordLength: int) -> pyduckdb.spark.rdd.RDD[bytes]:
|
|
75
|
-
# pass
|
|
76
|
-
|
|
77
|
-
# def broadcast(self, value: ~T) -> 'Broadcast[T]':
|
|
78
|
-
# pass
|
|
79
|
-
|
|
80
|
-
def cancelAllJobs(self) -> None:
|
|
81
|
-
raise ContributionsAcceptedError
|
|
82
|
-
|
|
83
|
-
def cancelJobGroup(self, groupId: str) -> None:
|
|
84
|
-
raise ContributionsAcceptedError
|
|
85
|
-
|
|
86
|
-
def dump_profiles(self, path: str) -> None:
|
|
87
|
-
raise ContributionsAcceptedError
|
|
88
|
-
|
|
89
|
-
# def emptyRDD(self) -> pyduckdb.spark.rdd.RDD[typing.Any]:
|
|
90
|
-
# pass
|
|
91
|
-
|
|
92
|
-
def getCheckpointDir(self) -> Optional[str]:
|
|
93
|
-
raise ContributionsAcceptedError
|
|
94
|
-
|
|
95
|
-
def getConf(self) -> SparkConf:
|
|
96
|
-
raise ContributionsAcceptedError
|
|
97
|
-
|
|
98
|
-
def getLocalProperty(self, key: str) -> Optional[str]:
|
|
99
|
-
raise ContributionsAcceptedError
|
|
100
|
-
|
|
101
|
-
# def hadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
102
|
-
# pass
|
|
103
|
-
|
|
104
|
-
# def hadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
105
|
-
# pass
|
|
106
|
-
|
|
107
|
-
# def newAPIHadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
108
|
-
# pass
|
|
109
|
-
|
|
110
|
-
# def newAPIHadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
111
|
-
# pass
|
|
112
|
-
|
|
113
|
-
# def parallelize(self, c: Iterable[~T], numSlices: Optional[int] = None) -> pyspark.rdd.RDD[~T]:
|
|
114
|
-
# pass
|
|
115
|
-
|
|
116
|
-
# def pickleFile(self, name: str, minPartitions: Optional[int] = None) -> pyspark.rdd.RDD[typing.Any]:
|
|
117
|
-
# pass
|
|
118
|
-
|
|
119
|
-
# def range(self, start: int, end: Optional[int] = None, step: int = 1, numSlices: Optional[int] = None) -> pyspark.rdd.RDD[int]:
|
|
120
|
-
# pass
|
|
121
|
-
|
|
122
|
-
# def runJob(self, rdd: pyspark.rdd.RDD[~T], partitionFunc: Callable[[Iterable[~T]], Iterable[~U]], partitions: Optional[Sequence[int]] = None, allowLocal: bool = False) -> List[~U]:
|
|
123
|
-
# pass
|
|
124
|
-
|
|
125
|
-
# def sequenceFile(self, path: str, keyClass: Optional[str] = None, valueClass: Optional[str] = None, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, minSplits: Optional[int] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
|
|
126
|
-
# pass
|
|
127
|
-
|
|
128
|
-
def setCheckpointDir(self, dirName: str) -> None:
|
|
129
|
-
raise ContributionsAcceptedError
|
|
130
|
-
|
|
131
|
-
def setJobDescription(self, value: str) -> None:
|
|
132
|
-
raise ContributionsAcceptedError
|
|
133
|
-
|
|
134
|
-
def setJobGroup(self, groupId: str, description: str, interruptOnCancel: bool = False) -> None:
|
|
135
|
-
raise ContributionsAcceptedError
|
|
136
|
-
|
|
137
|
-
def setLocalProperty(self, key: str, value: str) -> None:
|
|
138
|
-
raise ContributionsAcceptedError
|
|
139
|
-
|
|
140
|
-
def setLogLevel(self, logLevel: str) -> None:
|
|
141
|
-
raise ContributionsAcceptedError
|
|
142
|
-
|
|
143
|
-
def show_profiles(self) -> None:
|
|
144
|
-
raise ContributionsAcceptedError
|
|
145
|
-
|
|
146
|
-
def sparkUser(self) -> str:
|
|
147
|
-
raise ContributionsAcceptedError
|
|
148
|
-
|
|
149
|
-
# def statusTracker(self) -> pyduckdb.spark.status.StatusTracker:
|
|
150
|
-
# raise ContributionsAcceptedError
|
|
151
|
-
|
|
152
|
-
# def textFile(self, name: str, minPartitions: Optional[int] = None, use_unicode: bool = True) -> pyspark.rdd.RDD[str]:
|
|
153
|
-
# pass
|
|
154
|
-
|
|
155
|
-
# def union(self, rdds: List[pyspark.rdd.RDD[~T]]) -> pyspark.rdd.RDD[~T]:
|
|
156
|
-
# pass
|
|
157
|
-
|
|
158
|
-
# def wholeTextFiles(self, path: str, minPartitions: Optional[int] = None, use_unicode: bool = True) -> pyspark.rdd.RDD[typing.Tuple[str, str]]:
|
|
159
|
-
# pass
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
__all__ = ["SparkContext"]
|
pyduckdb/spark/exception.py
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
class ContributionsAcceptedError(NotImplementedError):
|
|
2
|
-
"""
|
|
3
|
-
This method is not planned to be implemented, if you would like to implement this method
|
|
4
|
-
or show your interest in this method to other members of the community,
|
|
5
|
-
feel free to open up a PR or a Discussion over on https://github.com/duckdb/duckdb
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
__all__ = ["ContributionsAcceptedError"]
|
pyduckdb/spark/sql/catalog.py
DELETED
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
from typing import List, NamedTuple, Optional
|
|
2
|
-
from pyduckdb.spark.sql.session import SparkSession
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class Database(NamedTuple):
|
|
6
|
-
name: str
|
|
7
|
-
description: Optional[str]
|
|
8
|
-
locationUri: str
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class Table(NamedTuple):
|
|
12
|
-
name: str
|
|
13
|
-
database: Optional[str]
|
|
14
|
-
description: Optional[str]
|
|
15
|
-
tableType: str
|
|
16
|
-
isTemporary: bool
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class Column(NamedTuple):
|
|
20
|
-
name: str
|
|
21
|
-
description: Optional[str]
|
|
22
|
-
dataType: str
|
|
23
|
-
nullable: bool
|
|
24
|
-
isPartition: bool
|
|
25
|
-
isBucket: bool
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class Function(NamedTuple):
|
|
29
|
-
name: str
|
|
30
|
-
description: Optional[str]
|
|
31
|
-
className: str
|
|
32
|
-
isTemporary: bool
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class Catalog:
|
|
36
|
-
def __init__(self, session: SparkSession):
|
|
37
|
-
self._session = session
|
|
38
|
-
|
|
39
|
-
def listDatabases(self) -> List[Database]:
|
|
40
|
-
res = self._session.conn.sql('select * from duckdb_databases()').fetchall()
|
|
41
|
-
|
|
42
|
-
def transform_to_database(x) -> Database:
|
|
43
|
-
return Database(name=x[0], description=None, locationUri='')
|
|
44
|
-
|
|
45
|
-
databases = [transform_to_database(x) for x in res]
|
|
46
|
-
return databases
|
|
47
|
-
|
|
48
|
-
def listTables(self) -> List[Table]:
|
|
49
|
-
res = self._session.conn.sql('select * from duckdb_tables()').fetchall()
|
|
50
|
-
|
|
51
|
-
def transform_to_table(x) -> Table:
|
|
52
|
-
return Table(name=x[4], database=x[0], description=x[13], tableType='', isTemporary=x[7])
|
|
53
|
-
|
|
54
|
-
tables = [transform_to_table(x) for x in res]
|
|
55
|
-
return tables
|
|
56
|
-
|
|
57
|
-
def listColumns(self, tableName: str, dbName: Optional[str] = None) -> List[Column]:
|
|
58
|
-
query = f"""
|
|
59
|
-
select * from duckdb_columns() where table_name = '{tableName}'
|
|
60
|
-
"""
|
|
61
|
-
if dbName:
|
|
62
|
-
query += f" and database_name = '{dbName}'"
|
|
63
|
-
res = self._session.conn.sql(query).fetchall()
|
|
64
|
-
|
|
65
|
-
def transform_to_column(x) -> Column:
|
|
66
|
-
return Column(name=x[6], description=None, dataType=x[11], nullable=x[8], isPartition=False, isBucket=False)
|
|
67
|
-
|
|
68
|
-
columns = [transform_to_column(x) for x in res]
|
|
69
|
-
return columns
|
|
70
|
-
|
|
71
|
-
def listFunctions(self, dbName: Optional[str] = None) -> List[Function]:
|
|
72
|
-
raise NotImplementedError
|
|
73
|
-
|
|
74
|
-
def setCurrentDatabase(self, dbName: str) -> None:
|
|
75
|
-
raise NotImplementedError
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
__all__ = ["Catalog", "Table", "Column", "Function", "Database"]
|
pyduckdb/spark/sql/conf.py
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
from typing import Optional, Union
|
|
2
|
-
from pyduckdb.spark._globals import _NoValueType, _NoValue
|
|
3
|
-
from duckdb import DuckDBPyConnection
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class RuntimeConfig:
|
|
7
|
-
def __init__(self, connection: DuckDBPyConnection):
|
|
8
|
-
self._connection = connection
|
|
9
|
-
|
|
10
|
-
def set(self, key: str, value: str) -> None:
|
|
11
|
-
raise NotImplementedError
|
|
12
|
-
|
|
13
|
-
def isModifiable(self, key: str) -> bool:
|
|
14
|
-
raise NotImplementedError
|
|
15
|
-
|
|
16
|
-
def unset(self, key: str) -> None:
|
|
17
|
-
raise NotImplementedError
|
|
18
|
-
|
|
19
|
-
def get(self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue) -> str:
|
|
20
|
-
raise NotImplementedError
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
__all__ = ["RuntimeConfig"]
|
pyduckdb/spark/sql/dataframe.py
DELETED
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
from ..exception import ContributionsAcceptedError
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING, List
|
|
4
|
-
|
|
5
|
-
from .readwriter import DataFrameWriter
|
|
6
|
-
from .types import Row, StructType
|
|
7
|
-
from .type_utils import duckdb_to_spark_schema
|
|
8
|
-
import duckdb
|
|
9
|
-
|
|
10
|
-
if TYPE_CHECKING:
|
|
11
|
-
from .session import SparkSession
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class DataFrame:
|
|
15
|
-
def __init__(self, relation: duckdb.DuckDBPyRelation, session: "SparkSession"):
|
|
16
|
-
self.relation = relation
|
|
17
|
-
self.session = session
|
|
18
|
-
self._schema = duckdb_to_spark_schema(self.relation.columns, self.relation.types) if self.relation else None
|
|
19
|
-
|
|
20
|
-
def show(self) -> None:
|
|
21
|
-
self.relation.show()
|
|
22
|
-
|
|
23
|
-
def createOrReplaceTempView(self, name: str) -> None:
|
|
24
|
-
raise NotImplementedError
|
|
25
|
-
|
|
26
|
-
def createGlobalTempView(self, name: str) -> None:
|
|
27
|
-
raise NotImplementedError
|
|
28
|
-
|
|
29
|
-
@property
|
|
30
|
-
def schema(self) -> StructType:
|
|
31
|
-
"""Returns the schema of this :class:`DataFrame` as a :class:`pyduckdb.spark.sql.types.StructType`.
|
|
32
|
-
|
|
33
|
-
.. versionadded:: 1.3.0
|
|
34
|
-
|
|
35
|
-
Examples
|
|
36
|
-
--------
|
|
37
|
-
>>> df.schema
|
|
38
|
-
StructType([StructField('age', IntegerType(), True),
|
|
39
|
-
StructField('name', StringType(), True)])
|
|
40
|
-
"""
|
|
41
|
-
return self._schema
|
|
42
|
-
|
|
43
|
-
@property
|
|
44
|
-
def write(self) -> DataFrameWriter:
|
|
45
|
-
return DataFrameWriter(self)
|
|
46
|
-
|
|
47
|
-
def printSchema(self):
|
|
48
|
-
raise ContributionsAcceptedError
|
|
49
|
-
|
|
50
|
-
def _cast_types(self, *types) -> "DataFrame":
|
|
51
|
-
existing_columns = self.relation.columns
|
|
52
|
-
types_count = len(types)
|
|
53
|
-
assert types_count == len(existing_columns)
|
|
54
|
-
cast_expressions = [f'"{existing}"::{target_type}' for existing, target_type in zip(existing_columns, types)]
|
|
55
|
-
cast_expressions = ', '.join(cast_expressions)
|
|
56
|
-
new_rel = self.relation.project(cast_expressions)
|
|
57
|
-
return DataFrame(new_rel, self.session)
|
|
58
|
-
|
|
59
|
-
def toDF(self, *cols) -> "DataFrame":
|
|
60
|
-
existing_columns = self.relation.columns
|
|
61
|
-
column_count = len(cols)
|
|
62
|
-
assert column_count == len(existing_columns)
|
|
63
|
-
projections = [f'"{existing}" as "{new}"' for existing, new in zip(existing_columns, cols)]
|
|
64
|
-
projections = ', '.join(projections)
|
|
65
|
-
new_rel = self.relation.project(projections)
|
|
66
|
-
return DataFrame(new_rel, self.session)
|
|
67
|
-
|
|
68
|
-
def collect(self) -> List[Row]:
|
|
69
|
-
columns = self.relation.columns
|
|
70
|
-
result = self.relation.fetchall()
|
|
71
|
-
rows = [Row(**dict(zip(columns, x))) for x in result]
|
|
72
|
-
return rows
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
__all__ = ["DataFrame"]
|