duckdb 0.8.2.dev3007__cp311-cp311-win_amd64.whl → 1.4.3.dev8__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. _duckdb-stubs/__init__.pyi +1478 -0
  2. _duckdb-stubs/_func.pyi +46 -0
  3. _duckdb-stubs/_sqltypes.pyi +75 -0
  4. duckdb/duckdb.cp311-win_amd64.pyd → _duckdb.cp311-win_amd64.pyd +0 -0
  5. adbc_driver_duckdb/__init__.py +10 -8
  6. adbc_driver_duckdb/dbapi.py +4 -5
  7. duckdb/__init__.py +250 -196
  8. duckdb/_dbapi_type_object.py +231 -0
  9. duckdb/_version.py +22 -0
  10. {pyduckdb → duckdb}/bytes_io_wrapper.py +12 -8
  11. duckdb/experimental/__init__.py +5 -0
  12. duckdb/experimental/spark/__init__.py +6 -0
  13. {pyduckdb → duckdb/experimental}/spark/_globals.py +8 -8
  14. duckdb/experimental/spark/_typing.py +46 -0
  15. duckdb/experimental/spark/conf.py +46 -0
  16. duckdb/experimental/spark/context.py +180 -0
  17. duckdb/experimental/spark/errors/__init__.py +70 -0
  18. duckdb/experimental/spark/errors/error_classes.py +918 -0
  19. duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
  20. duckdb/experimental/spark/errors/exceptions/base.py +168 -0
  21. duckdb/experimental/spark/errors/utils.py +111 -0
  22. duckdb/experimental/spark/exception.py +18 -0
  23. {pyduckdb → duckdb/experimental}/spark/sql/__init__.py +5 -5
  24. duckdb/experimental/spark/sql/_typing.py +86 -0
  25. duckdb/experimental/spark/sql/catalog.py +79 -0
  26. duckdb/experimental/spark/sql/column.py +361 -0
  27. duckdb/experimental/spark/sql/conf.py +24 -0
  28. duckdb/experimental/spark/sql/dataframe.py +1389 -0
  29. duckdb/experimental/spark/sql/functions.py +6195 -0
  30. duckdb/experimental/spark/sql/group.py +424 -0
  31. duckdb/experimental/spark/sql/readwriter.py +435 -0
  32. duckdb/experimental/spark/sql/session.py +297 -0
  33. duckdb/experimental/spark/sql/streaming.py +36 -0
  34. duckdb/experimental/spark/sql/type_utils.py +107 -0
  35. {pyduckdb → duckdb/experimental}/spark/sql/types.py +323 -342
  36. duckdb/experimental/spark/sql/udf.py +37 -0
  37. duckdb/filesystem.py +33 -0
  38. duckdb/func/__init__.py +3 -0
  39. duckdb/functional/__init__.py +12 -16
  40. duckdb/polars_io.py +284 -0
  41. duckdb/py.typed +0 -0
  42. duckdb/query_graph/__main__.py +358 -0
  43. duckdb/sqltypes/__init__.py +63 -0
  44. duckdb/typing/__init__.py +18 -6
  45. {pyduckdb → duckdb}/udf.py +10 -5
  46. duckdb/value/__init__.py +1 -0
  47. pyduckdb/value/constant.py → duckdb/value/constant/__init__.py +66 -57
  48. duckdb-1.4.3.dev8.dist-info/METADATA +88 -0
  49. duckdb-1.4.3.dev8.dist-info/RECORD +52 -0
  50. {duckdb-0.8.2.dev3007.dist-info → duckdb-1.4.3.dev8.dist-info}/WHEEL +1 -1
  51. duckdb-1.4.3.dev8.dist-info/licenses/LICENSE +7 -0
  52. duckdb-0.8.2.dev3007.dist-info/METADATA +0 -20
  53. duckdb-0.8.2.dev3007.dist-info/RECORD +0 -34
  54. duckdb-0.8.2.dev3007.dist-info/top_level.txt +0 -4
  55. duckdb-stubs/__init__.pyi +0 -574
  56. duckdb-stubs/functional/__init__.pyi +0 -33
  57. duckdb-stubs/typing/__init__.pyi +0 -35
  58. pyduckdb/__init__.py +0 -61
  59. pyduckdb/filesystem.py +0 -64
  60. pyduckdb/spark/__init__.py +0 -7
  61. pyduckdb/spark/conf.py +0 -45
  62. pyduckdb/spark/context.py +0 -162
  63. pyduckdb/spark/exception.py +0 -9
  64. pyduckdb/spark/sql/catalog.py +0 -78
  65. pyduckdb/spark/sql/conf.py +0 -23
  66. pyduckdb/spark/sql/dataframe.py +0 -75
  67. pyduckdb/spark/sql/readwriter.py +0 -180
  68. pyduckdb/spark/sql/session.py +0 -249
  69. pyduckdb/spark/sql/streaming.py +0 -37
  70. pyduckdb/spark/sql/type_utils.py +0 -104
  71. pyduckdb/spark/sql/udf.py +0 -9
  72. {pyduckdb → duckdb/experimental}/spark/LICENSE +0 -0
pyduckdb/__init__.py DELETED
@@ -1,61 +0,0 @@
1
- from .value.constant import (
2
- Value,
3
- NullValue,
4
- BooleanValue,
5
- UnsignedBinaryValue,
6
- UnsignedShortValue,
7
- UnsignedIntegerValue,
8
- UnsignedLongValue,
9
- BinaryValue,
10
- ShortValue,
11
- IntegerValue,
12
- LongValue,
13
- HugeIntegerValue,
14
- FloatValue,
15
- DoubleValue,
16
- DecimalValue,
17
- StringValue,
18
- UUIDValue,
19
- BitValue,
20
- BlobValue,
21
- DateValue,
22
- IntervalValue,
23
- TimestampValue,
24
- TimestampSecondValue,
25
- TimestampMilisecondValue,
26
- TimestampNanosecondValue,
27
- TimestampTimeZoneValue,
28
- TimeValue,
29
- TimeTimeZoneValue,
30
- )
31
-
32
- __all__ = [
33
- "Value",
34
- "NullValue",
35
- "BooleanValue",
36
- "UnsignedBinaryValue",
37
- "UnsignedShortValue",
38
- "UnsignedIntegerValue",
39
- "UnsignedLongValue",
40
- "BinaryValue",
41
- "ShortValue",
42
- "IntegerValue",
43
- "LongValue",
44
- "HugeIntegerValue",
45
- "FloatValue",
46
- "DoubleValue",
47
- "DecimalValue",
48
- "StringValue",
49
- "UUIDValue",
50
- "BitValue",
51
- "BlobValue",
52
- "DateValue",
53
- "IntervalValue",
54
- "TimestampValue",
55
- "TimestampSecondValue",
56
- "TimestampMilisecondValue",
57
- "TimestampNanosecondValue",
58
- "TimestampTimeZoneValue",
59
- "TimeValue",
60
- "TimeTimeZoneValue",
61
- ]
pyduckdb/filesystem.py DELETED
@@ -1,64 +0,0 @@
1
- from fsspec import filesystem, AbstractFileSystem
2
- from fsspec.implementations.memory import MemoryFileSystem
3
- from shutil import copyfileobj
4
- from .bytes_io_wrapper import BytesIOWrapper
5
- from io import TextIOBase
6
-
7
-
8
- def is_file_like(obj):
9
- # We only care that we can read from the file
10
- return hasattr(obj, "read") and hasattr(obj, "seek")
11
-
12
-
13
- class ModifiedMemoryFileSystem(MemoryFileSystem):
14
- protocol = ('DUCKDB_INTERNAL_OBJECTSTORE',)
15
- # defer to the original implementation that doesn't hardcode the protocol
16
- _strip_protocol = classmethod(AbstractFileSystem._strip_protocol.__func__)
17
-
18
- # Add this manually because it's apparently missing on windows???
19
- def unstrip_protocol(self, name):
20
- """Format FS-specific path to generic, including protocol"""
21
- protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
22
- for protocol in protos:
23
- if name.startswith(f"{protocol}://"):
24
- return name
25
- return f"{protos[0]}://{name}"
26
-
27
- def info(self, path, **kwargs):
28
- path = self._strip_protocol(path)
29
- if path in self.store:
30
- filelike = self.store[path]
31
- return {
32
- "name": path,
33
- "size": getattr(filelike, "size", 0),
34
- "type": "file",
35
- "created": getattr(filelike, "created", None),
36
- }
37
- else:
38
- raise FileNotFoundError(path)
39
-
40
- def _open(
41
- self,
42
- path,
43
- mode="rb",
44
- block_size=None,
45
- autocommit=True,
46
- cache_options=None,
47
- **kwargs,
48
- ):
49
- path = self._strip_protocol(path)
50
- if path in self.store:
51
- f = self.store[path]
52
- return f
53
- else:
54
- raise FileNotFoundError(path)
55
-
56
- def add_file(self, object, path):
57
- if not is_file_like(object):
58
- raise ValueError("Can not read from a non file-like object")
59
- path = self._strip_protocol(path)
60
- if isinstance(object, TextIOBase):
61
- # Wrap this so that we can return a bytes object from 'read'
62
- self.store[path] = BytesIOWrapper(object)
63
- else:
64
- self.store[path] = object
@@ -1,7 +0,0 @@
1
- from .sql import SparkSession, DataFrame
2
- from .conf import SparkConf
3
- from .context import SparkContext
4
- from ._globals import _NoValue
5
- from .exception import ContributionsAcceptedError
6
-
7
- __all__ = ["SparkSession", "DataFrame", "SparkConf", "SparkContext", "ContributionsAcceptedError"]
pyduckdb/spark/conf.py DELETED
@@ -1,45 +0,0 @@
1
- from typing import TYPE_CHECKING, Optional, List, Tuple
2
- from pyduckdb.spark.exception import ContributionsAcceptedError
3
-
4
-
5
- class SparkConf:
6
- def __init__(self):
7
- raise NotImplementedError
8
-
9
- def contains(self, key: str) -> bool:
10
- raise ContributionsAcceptedError
11
-
12
- def get(self, key: str, defaultValue: Optional[str] = None) -> Optional[str]:
13
- raise ContributionsAcceptedError
14
-
15
- def getAll(self) -> List[Tuple[str, str]]:
16
- raise ContributionsAcceptedError
17
-
18
- def set(self, key: str, value: str) -> "SparkConf":
19
- raise ContributionsAcceptedError
20
-
21
- def setAll(self, pairs: List[Tuple[str, str]]) -> "SparkConf":
22
- raise ContributionsAcceptedError
23
-
24
- def setAppName(self, value: str) -> "SparkConf":
25
- raise ContributionsAcceptedError
26
-
27
- def setExecutorEnv(
28
- self, key: Optional[str] = None, value: Optional[str] = None, pairs: Optional[List[Tuple[str, str]]] = None
29
- ) -> "SparkConf":
30
- raise ContributionsAcceptedError
31
-
32
- def setIfMissing(self, key: str, value: str) -> "SparkConf":
33
- raise ContributionsAcceptedError
34
-
35
- def setMaster(self, value: str) -> "SparkConf":
36
- raise ContributionsAcceptedError
37
-
38
- def setSparkHome(self, value: str) -> "SparkConf":
39
- raise ContributionsAcceptedError
40
-
41
- def toDebugString(self) -> str:
42
- raise ContributionsAcceptedError
43
-
44
-
45
- __all__ = ["SparkConf"]
pyduckdb/spark/context.py DELETED
@@ -1,162 +0,0 @@
1
- from typing import Optional
2
- import duckdb
3
- from duckdb import DuckDBPyConnection
4
-
5
- from pyduckdb.spark.exception import ContributionsAcceptedError
6
- from pyduckdb.spark.conf import SparkConf
7
-
8
-
9
- class SparkContext:
10
- def __init__(self, master: str):
11
- self._connection = duckdb.connect(master)
12
-
13
- @property
14
- def connection(self) -> DuckDBPyConnection:
15
- return self._connection
16
-
17
- def stop(self) -> None:
18
- self._connection.close()
19
-
20
- @classmethod
21
- def getOrCreate(cls, conf: Optional[SparkConf] = None) -> "SparkContext":
22
- raise ContributionsAcceptedError
23
-
24
- @classmethod
25
- def setSystemProperty(cls, key: str, value: str) -> None:
26
- raise ContributionsAcceptedError
27
-
28
- @property
29
- def applicationId(self) -> str:
30
- raise ContributionsAcceptedError
31
-
32
- @property
33
- def defaultMinPartitions(self) -> int:
34
- raise ContributionsAcceptedError
35
-
36
- @property
37
- def defaultParallelism(self) -> int:
38
- raise ContributionsAcceptedError
39
-
40
- # @property
41
- # def resources(self) -> Dict[str, ResourceInformation]:
42
- # raise ContributionsAcceptedError
43
-
44
- @property
45
- def startTime(self) -> str:
46
- raise ContributionsAcceptedError
47
-
48
- @property
49
- def uiWebUrl(self) -> str:
50
- raise ContributionsAcceptedError
51
-
52
- @property
53
- def version(self) -> str:
54
- raise ContributionsAcceptedError
55
-
56
- def __repr__(self) -> str:
57
- raise ContributionsAcceptedError
58
-
59
- # def accumulator(self, value: ~T, accum_param: Optional[ForwardRef('AccumulatorParam[T]')] = None) -> 'Accumulator[T]':
60
- # pass
61
-
62
- def addArchive(self, path: str) -> None:
63
- raise ContributionsAcceptedError
64
-
65
- def addFile(self, path: str, recursive: bool = False) -> None:
66
- raise ContributionsAcceptedError
67
-
68
- def addPyFile(self, path: str) -> None:
69
- raise ContributionsAcceptedError
70
-
71
- # def binaryFiles(self, path: str, minPartitions: Optional[int] = None) -> pyduckdb.spark.rdd.RDD[typing.Tuple[str, bytes]]:
72
- # pass
73
-
74
- # def binaryRecords(self, path: str, recordLength: int) -> pyduckdb.spark.rdd.RDD[bytes]:
75
- # pass
76
-
77
- # def broadcast(self, value: ~T) -> 'Broadcast[T]':
78
- # pass
79
-
80
- def cancelAllJobs(self) -> None:
81
- raise ContributionsAcceptedError
82
-
83
- def cancelJobGroup(self, groupId: str) -> None:
84
- raise ContributionsAcceptedError
85
-
86
- def dump_profiles(self, path: str) -> None:
87
- raise ContributionsAcceptedError
88
-
89
- # def emptyRDD(self) -> pyduckdb.spark.rdd.RDD[typing.Any]:
90
- # pass
91
-
92
- def getCheckpointDir(self) -> Optional[str]:
93
- raise ContributionsAcceptedError
94
-
95
- def getConf(self) -> SparkConf:
96
- raise ContributionsAcceptedError
97
-
98
- def getLocalProperty(self, key: str) -> Optional[str]:
99
- raise ContributionsAcceptedError
100
-
101
- # def hadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
102
- # pass
103
-
104
- # def hadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
105
- # pass
106
-
107
- # def newAPIHadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
108
- # pass
109
-
110
- # def newAPIHadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
111
- # pass
112
-
113
- # def parallelize(self, c: Iterable[~T], numSlices: Optional[int] = None) -> pyspark.rdd.RDD[~T]:
114
- # pass
115
-
116
- # def pickleFile(self, name: str, minPartitions: Optional[int] = None) -> pyspark.rdd.RDD[typing.Any]:
117
- # pass
118
-
119
- # def range(self, start: int, end: Optional[int] = None, step: int = 1, numSlices: Optional[int] = None) -> pyspark.rdd.RDD[int]:
120
- # pass
121
-
122
- # def runJob(self, rdd: pyspark.rdd.RDD[~T], partitionFunc: Callable[[Iterable[~T]], Iterable[~U]], partitions: Optional[Sequence[int]] = None, allowLocal: bool = False) -> List[~U]:
123
- # pass
124
-
125
- # def sequenceFile(self, path: str, keyClass: Optional[str] = None, valueClass: Optional[str] = None, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, minSplits: Optional[int] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
126
- # pass
127
-
128
- def setCheckpointDir(self, dirName: str) -> None:
129
- raise ContributionsAcceptedError
130
-
131
- def setJobDescription(self, value: str) -> None:
132
- raise ContributionsAcceptedError
133
-
134
- def setJobGroup(self, groupId: str, description: str, interruptOnCancel: bool = False) -> None:
135
- raise ContributionsAcceptedError
136
-
137
- def setLocalProperty(self, key: str, value: str) -> None:
138
- raise ContributionsAcceptedError
139
-
140
- def setLogLevel(self, logLevel: str) -> None:
141
- raise ContributionsAcceptedError
142
-
143
- def show_profiles(self) -> None:
144
- raise ContributionsAcceptedError
145
-
146
- def sparkUser(self) -> str:
147
- raise ContributionsAcceptedError
148
-
149
- # def statusTracker(self) -> pyduckdb.spark.status.StatusTracker:
150
- # raise ContributionsAcceptedError
151
-
152
- # def textFile(self, name: str, minPartitions: Optional[int] = None, use_unicode: bool = True) -> pyspark.rdd.RDD[str]:
153
- # pass
154
-
155
- # def union(self, rdds: List[pyspark.rdd.RDD[~T]]) -> pyspark.rdd.RDD[~T]:
156
- # pass
157
-
158
- # def wholeTextFiles(self, path: str, minPartitions: Optional[int] = None, use_unicode: bool = True) -> pyspark.rdd.RDD[typing.Tuple[str, str]]:
159
- # pass
160
-
161
-
162
- __all__ = ["SparkContext"]
@@ -1,9 +0,0 @@
1
- class ContributionsAcceptedError(NotImplementedError):
2
- """
3
- This method is not planned to be implemented, if you would like to implement this method
4
- or show your interest in this method to other members of the community,
5
- feel free to open up a PR or a Discussion over on https://github.com/duckdb/duckdb
6
- """
7
-
8
-
9
- __all__ = ["ContributionsAcceptedError"]
@@ -1,78 +0,0 @@
1
- from typing import List, NamedTuple, Optional
2
- from pyduckdb.spark.sql.session import SparkSession
3
-
4
-
5
- class Database(NamedTuple):
6
- name: str
7
- description: Optional[str]
8
- locationUri: str
9
-
10
-
11
- class Table(NamedTuple):
12
- name: str
13
- database: Optional[str]
14
- description: Optional[str]
15
- tableType: str
16
- isTemporary: bool
17
-
18
-
19
- class Column(NamedTuple):
20
- name: str
21
- description: Optional[str]
22
- dataType: str
23
- nullable: bool
24
- isPartition: bool
25
- isBucket: bool
26
-
27
-
28
- class Function(NamedTuple):
29
- name: str
30
- description: Optional[str]
31
- className: str
32
- isTemporary: bool
33
-
34
-
35
- class Catalog:
36
- def __init__(self, session: SparkSession):
37
- self._session = session
38
-
39
- def listDatabases(self) -> List[Database]:
40
- res = self._session.conn.sql('select * from duckdb_databases()').fetchall()
41
-
42
- def transform_to_database(x) -> Database:
43
- return Database(name=x[0], description=None, locationUri='')
44
-
45
- databases = [transform_to_database(x) for x in res]
46
- return databases
47
-
48
- def listTables(self) -> List[Table]:
49
- res = self._session.conn.sql('select * from duckdb_tables()').fetchall()
50
-
51
- def transform_to_table(x) -> Table:
52
- return Table(name=x[4], database=x[0], description=x[13], tableType='', isTemporary=x[7])
53
-
54
- tables = [transform_to_table(x) for x in res]
55
- return tables
56
-
57
- def listColumns(self, tableName: str, dbName: Optional[str] = None) -> List[Column]:
58
- query = f"""
59
- select * from duckdb_columns() where table_name = '{tableName}'
60
- """
61
- if dbName:
62
- query += f" and database_name = '{dbName}'"
63
- res = self._session.conn.sql(query).fetchall()
64
-
65
- def transform_to_column(x) -> Column:
66
- return Column(name=x[6], description=None, dataType=x[11], nullable=x[8], isPartition=False, isBucket=False)
67
-
68
- columns = [transform_to_column(x) for x in res]
69
- return columns
70
-
71
- def listFunctions(self, dbName: Optional[str] = None) -> List[Function]:
72
- raise NotImplementedError
73
-
74
- def setCurrentDatabase(self, dbName: str) -> None:
75
- raise NotImplementedError
76
-
77
-
78
- __all__ = ["Catalog", "Table", "Column", "Function", "Database"]
@@ -1,23 +0,0 @@
1
- from typing import Optional, Union
2
- from pyduckdb.spark._globals import _NoValueType, _NoValue
3
- from duckdb import DuckDBPyConnection
4
-
5
-
6
- class RuntimeConfig:
7
- def __init__(self, connection: DuckDBPyConnection):
8
- self._connection = connection
9
-
10
- def set(self, key: str, value: str) -> None:
11
- raise NotImplementedError
12
-
13
- def isModifiable(self, key: str) -> bool:
14
- raise NotImplementedError
15
-
16
- def unset(self, key: str) -> None:
17
- raise NotImplementedError
18
-
19
- def get(self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue) -> str:
20
- raise NotImplementedError
21
-
22
-
23
- __all__ = ["RuntimeConfig"]
@@ -1,75 +0,0 @@
1
- from ..exception import ContributionsAcceptedError
2
-
3
- from typing import TYPE_CHECKING, List
4
-
5
- from .readwriter import DataFrameWriter
6
- from .types import Row, StructType
7
- from .type_utils import duckdb_to_spark_schema
8
- import duckdb
9
-
10
- if TYPE_CHECKING:
11
- from .session import SparkSession
12
-
13
-
14
- class DataFrame:
15
- def __init__(self, relation: duckdb.DuckDBPyRelation, session: "SparkSession"):
16
- self.relation = relation
17
- self.session = session
18
- self._schema = duckdb_to_spark_schema(self.relation.columns, self.relation.types) if self.relation else None
19
-
20
- def show(self) -> None:
21
- self.relation.show()
22
-
23
- def createOrReplaceTempView(self, name: str) -> None:
24
- raise NotImplementedError
25
-
26
- def createGlobalTempView(self, name: str) -> None:
27
- raise NotImplementedError
28
-
29
- @property
30
- def schema(self) -> StructType:
31
- """Returns the schema of this :class:`DataFrame` as a :class:`pyduckdb.spark.sql.types.StructType`.
32
-
33
- .. versionadded:: 1.3.0
34
-
35
- Examples
36
- --------
37
- >>> df.schema
38
- StructType([StructField('age', IntegerType(), True),
39
- StructField('name', StringType(), True)])
40
- """
41
- return self._schema
42
-
43
- @property
44
- def write(self) -> DataFrameWriter:
45
- return DataFrameWriter(self)
46
-
47
- def printSchema(self):
48
- raise ContributionsAcceptedError
49
-
50
- def _cast_types(self, *types) -> "DataFrame":
51
- existing_columns = self.relation.columns
52
- types_count = len(types)
53
- assert types_count == len(existing_columns)
54
- cast_expressions = [f'"{existing}"::{target_type}' for existing, target_type in zip(existing_columns, types)]
55
- cast_expressions = ', '.join(cast_expressions)
56
- new_rel = self.relation.project(cast_expressions)
57
- return DataFrame(new_rel, self.session)
58
-
59
- def toDF(self, *cols) -> "DataFrame":
60
- existing_columns = self.relation.columns
61
- column_count = len(cols)
62
- assert column_count == len(existing_columns)
63
- projections = [f'"{existing}" as "{new}"' for existing, new in zip(existing_columns, cols)]
64
- projections = ', '.join(projections)
65
- new_rel = self.relation.project(projections)
66
- return DataFrame(new_rel, self.session)
67
-
68
- def collect(self) -> List[Row]:
69
- columns = self.relation.columns
70
- result = self.relation.fetchall()
71
- rows = [Row(**dict(zip(columns, x))) for x in result]
72
- return rows
73
-
74
-
75
- __all__ = ["DataFrame"]