duckdb 0.8.2.dev3007__cp311-cp311-win_amd64.whl → 1.4.3.dev8__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. _duckdb-stubs/__init__.pyi +1478 -0
  2. _duckdb-stubs/_func.pyi +46 -0
  3. _duckdb-stubs/_sqltypes.pyi +75 -0
  4. duckdb/duckdb.cp311-win_amd64.pyd → _duckdb.cp311-win_amd64.pyd +0 -0
  5. adbc_driver_duckdb/__init__.py +10 -8
  6. adbc_driver_duckdb/dbapi.py +4 -5
  7. duckdb/__init__.py +250 -196
  8. duckdb/_dbapi_type_object.py +231 -0
  9. duckdb/_version.py +22 -0
  10. {pyduckdb → duckdb}/bytes_io_wrapper.py +12 -8
  11. duckdb/experimental/__init__.py +5 -0
  12. duckdb/experimental/spark/__init__.py +6 -0
  13. {pyduckdb → duckdb/experimental}/spark/_globals.py +8 -8
  14. duckdb/experimental/spark/_typing.py +46 -0
  15. duckdb/experimental/spark/conf.py +46 -0
  16. duckdb/experimental/spark/context.py +180 -0
  17. duckdb/experimental/spark/errors/__init__.py +70 -0
  18. duckdb/experimental/spark/errors/error_classes.py +918 -0
  19. duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
  20. duckdb/experimental/spark/errors/exceptions/base.py +168 -0
  21. duckdb/experimental/spark/errors/utils.py +111 -0
  22. duckdb/experimental/spark/exception.py +18 -0
  23. {pyduckdb → duckdb/experimental}/spark/sql/__init__.py +5 -5
  24. duckdb/experimental/spark/sql/_typing.py +86 -0
  25. duckdb/experimental/spark/sql/catalog.py +79 -0
  26. duckdb/experimental/spark/sql/column.py +361 -0
  27. duckdb/experimental/spark/sql/conf.py +24 -0
  28. duckdb/experimental/spark/sql/dataframe.py +1389 -0
  29. duckdb/experimental/spark/sql/functions.py +6195 -0
  30. duckdb/experimental/spark/sql/group.py +424 -0
  31. duckdb/experimental/spark/sql/readwriter.py +435 -0
  32. duckdb/experimental/spark/sql/session.py +297 -0
  33. duckdb/experimental/spark/sql/streaming.py +36 -0
  34. duckdb/experimental/spark/sql/type_utils.py +107 -0
  35. {pyduckdb → duckdb/experimental}/spark/sql/types.py +323 -342
  36. duckdb/experimental/spark/sql/udf.py +37 -0
  37. duckdb/filesystem.py +33 -0
  38. duckdb/func/__init__.py +3 -0
  39. duckdb/functional/__init__.py +12 -16
  40. duckdb/polars_io.py +284 -0
  41. duckdb/py.typed +0 -0
  42. duckdb/query_graph/__main__.py +358 -0
  43. duckdb/sqltypes/__init__.py +63 -0
  44. duckdb/typing/__init__.py +18 -6
  45. {pyduckdb → duckdb}/udf.py +10 -5
  46. duckdb/value/__init__.py +1 -0
  47. pyduckdb/value/constant.py → duckdb/value/constant/__init__.py +66 -57
  48. duckdb-1.4.3.dev8.dist-info/METADATA +88 -0
  49. duckdb-1.4.3.dev8.dist-info/RECORD +52 -0
  50. {duckdb-0.8.2.dev3007.dist-info → duckdb-1.4.3.dev8.dist-info}/WHEEL +1 -1
  51. duckdb-1.4.3.dev8.dist-info/licenses/LICENSE +7 -0
  52. duckdb-0.8.2.dev3007.dist-info/METADATA +0 -20
  53. duckdb-0.8.2.dev3007.dist-info/RECORD +0 -34
  54. duckdb-0.8.2.dev3007.dist-info/top_level.txt +0 -4
  55. duckdb-stubs/__init__.pyi +0 -574
  56. duckdb-stubs/functional/__init__.pyi +0 -33
  57. duckdb-stubs/typing/__init__.pyi +0 -35
  58. pyduckdb/__init__.py +0 -61
  59. pyduckdb/filesystem.py +0 -64
  60. pyduckdb/spark/__init__.py +0 -7
  61. pyduckdb/spark/conf.py +0 -45
  62. pyduckdb/spark/context.py +0 -162
  63. pyduckdb/spark/exception.py +0 -9
  64. pyduckdb/spark/sql/catalog.py +0 -78
  65. pyduckdb/spark/sql/conf.py +0 -23
  66. pyduckdb/spark/sql/dataframe.py +0 -75
  67. pyduckdb/spark/sql/readwriter.py +0 -180
  68. pyduckdb/spark/sql/session.py +0 -249
  69. pyduckdb/spark/sql/streaming.py +0 -37
  70. pyduckdb/spark/sql/type_utils.py +0 -104
  71. pyduckdb/spark/sql/udf.py +0 -9
  72. {pyduckdb → duckdb/experimental}/spark/LICENSE +0 -0
@@ -1,180 +0,0 @@
1
- from typing import TYPE_CHECKING, Iterable, Union, List, Optional, cast
2
- from pyduckdb.spark.sql.types import StructType
3
- from ..exception import ContributionsAcceptedError
4
-
5
- PrimitiveType = Union[bool, float, int, str]
6
- OptionalPrimitiveType = Optional[PrimitiveType]
7
-
8
- if TYPE_CHECKING:
9
- from pyduckdb.spark.sql.dataframe import DataFrame
10
- from pyduckdb.spark.sql.session import SparkSession
11
-
12
-
13
- class DataFrameWriter:
14
- def __init__(self, dataframe: "DataFrame"):
15
- self.dataframe = dataframe
16
-
17
- def saveAsTable(self, table_name: str) -> None:
18
- relation = self.dataframe.relation
19
- relation.create(table_name)
20
-
21
-
22
- class DataFrameReader:
23
- def __init__(self, session: "SparkSession"):
24
- self.session = session
25
-
26
- def load(
27
- self,
28
- path: Optional[Union[str, List[str]]] = None,
29
- format: Optional[str] = None,
30
- schema: Optional[Union[StructType, str]] = None,
31
- **options: OptionalPrimitiveType,
32
- ) -> "DataFrame":
33
- from pyduckdb.spark.sql.dataframe import DataFrame
34
-
35
- if not isinstance(path, str):
36
- raise ImportError
37
- if options:
38
- raise ContributionsAcceptedError
39
-
40
- rel = None
41
- if format:
42
- format = format.lower()
43
- if format == 'csv' or format == 'tsv':
44
- rel = self.session.conn.read_csv(path)
45
- elif format == 'json':
46
- rel = self.session.conn.read_json(path)
47
- elif format == 'parquet':
48
- rel = self.session.conn.read_parquet(path)
49
- else:
50
- raise ContributionsAcceptedError
51
- else:
52
- rel = self.session.conn.sql(f'select * from {path}')
53
- df = DataFrame(rel, self.session)
54
- if schema:
55
- if not isinstance(schema, StructType):
56
- raise ContributionsAcceptedError
57
- schema = cast(StructType, schema)
58
- types, names = schema.extract_types_and_names()
59
- df = df._cast_types(types)
60
- df = df.toDF(names)
61
- raise NotImplementedError
62
-
63
- def csv(
64
- self,
65
- path: Union[str, List[str]],
66
- schema: Optional[Union[StructType, str]] = None,
67
- sep: Optional[str] = None,
68
- encoding: Optional[str] = None,
69
- quote: Optional[str] = None,
70
- escape: Optional[str] = None,
71
- comment: Optional[str] = None,
72
- header: Optional[Union[bool, str]] = None,
73
- inferSchema: Optional[Union[bool, str]] = None,
74
- ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = None,
75
- ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = None,
76
- nullValue: Optional[str] = None,
77
- nanValue: Optional[str] = None,
78
- positiveInf: Optional[str] = None,
79
- negativeInf: Optional[str] = None,
80
- dateFormat: Optional[str] = None,
81
- timestampFormat: Optional[str] = None,
82
- maxColumns: Optional[Union[int, str]] = None,
83
- maxCharsPerColumn: Optional[Union[int, str]] = None,
84
- maxMalformedLogPerPartition: Optional[Union[int, str]] = None,
85
- mode: Optional[str] = None,
86
- columnNameOfCorruptRecord: Optional[str] = None,
87
- multiLine: Optional[Union[bool, str]] = None,
88
- charToEscapeQuoteEscaping: Optional[str] = None,
89
- samplingRatio: Optional[Union[float, str]] = None,
90
- enforceSchema: Optional[Union[bool, str]] = None,
91
- emptyValue: Optional[str] = None,
92
- locale: Optional[str] = None,
93
- lineSep: Optional[str] = None,
94
- pathGlobFilter: Optional[Union[bool, str]] = None,
95
- recursiveFileLookup: Optional[Union[bool, str]] = None,
96
- modifiedBefore: Optional[Union[bool, str]] = None,
97
- modifiedAfter: Optional[Union[bool, str]] = None,
98
- unescapedQuoteHandling: Optional[str] = None,
99
- ) -> "DataFrame":
100
- if not isinstance(path, str):
101
- raise NotImplementedError
102
- if schema and not isinstance(schema, StructType):
103
- raise ContributionsAcceptedError
104
- if comment:
105
- raise ContributionsAcceptedError
106
- if inferSchema:
107
- raise ContributionsAcceptedError
108
- if ignoreLeadingWhiteSpace:
109
- raise ContributionsAcceptedError
110
- if ignoreTrailingWhiteSpace:
111
- raise ContributionsAcceptedError
112
- if nanValue:
113
- raise ConnectionAbortedError
114
- if positiveInf:
115
- raise ConnectionAbortedError
116
- if negativeInf:
117
- raise ConnectionAbortedError
118
- if negativeInf:
119
- raise ConnectionAbortedError
120
- if maxColumns:
121
- raise ContributionsAcceptedError
122
- if maxCharsPerColumn:
123
- raise ContributionsAcceptedError
124
- if maxMalformedLogPerPartition:
125
- raise ContributionsAcceptedError
126
- if mode:
127
- raise ContributionsAcceptedError
128
- if columnNameOfCorruptRecord:
129
- raise ContributionsAcceptedError
130
- if multiLine:
131
- raise ContributionsAcceptedError
132
- if charToEscapeQuoteEscaping:
133
- raise ContributionsAcceptedError
134
- if samplingRatio:
135
- raise ContributionsAcceptedError
136
- if enforceSchema:
137
- raise ContributionsAcceptedError
138
- if emptyValue:
139
- raise ContributionsAcceptedError
140
- if locale:
141
- raise ContributionsAcceptedError
142
- if pathGlobFilter:
143
- raise ContributionsAcceptedError
144
- if recursiveFileLookup:
145
- raise ContributionsAcceptedError
146
- if modifiedBefore:
147
- raise ContributionsAcceptedError
148
- if modifiedAfter:
149
- raise ContributionsAcceptedError
150
- if unescapedQuoteHandling:
151
- raise ContributionsAcceptedError
152
- if lineSep:
153
- # We have support for custom newline, just needs to be ported to 'read_csv'
154
- raise NotImplementedError
155
-
156
- dtype = None
157
- names = None
158
- if schema:
159
- schema = cast(StructType, schema)
160
- dtype, names = schema.extract_types_and_names()
161
-
162
- rel = self.session.conn.read_csv(
163
- path,
164
- header=header if isinstance(header, bool) else header == "True",
165
- sep=sep,
166
- dtype=dtype,
167
- na_values=nullValue,
168
- quotechar=quote,
169
- escapechar=escape,
170
- encoding=encoding,
171
- date_format=dateFormat,
172
- timestamp_format=timestampFormat,
173
- )
174
- df = DataFrame(rel, self.session)
175
- if names:
176
- df = df.toDF(*names)
177
- return df
178
-
179
-
180
- __all__ = ["DataFrameWriter", "DataFrameReader"]
@@ -1,249 +0,0 @@
1
- from typing import Optional, List, Tuple, Any, Union, Iterable, TYPE_CHECKING
2
- import uuid
3
-
4
- if TYPE_CHECKING:
5
- from pyduckdb.spark.sql.catalog import Catalog
6
- from pandas.core.frame import DataFrame as PandasDataFrame
7
-
8
- from pyduckdb.spark.exception import ContributionsAcceptedError
9
-
10
- from pyduckdb.spark.sql.types import StructType, AtomicType, DataType
11
- from pyduckdb.spark.conf import SparkConf
12
- from pyduckdb.spark.sql.dataframe import DataFrame
13
- from pyduckdb.spark.sql.conf import RuntimeConfig
14
- from pyduckdb.spark.sql.readwriter import DataFrameReader
15
- from pyduckdb.spark.context import SparkContext
16
- from pyduckdb.spark.sql.udf import UDFRegistration
17
- from pyduckdb.spark.sql.streaming import DataStreamReader
18
- import duckdb
19
-
20
- # In spark:
21
- # SparkSession holds a SparkContext
22
- # SparkContext gets created from SparkConf
23
- # At this level the check is made to determine whether the instance already exists and just needs to be retrieved or it needs to be created
24
-
25
- # For us this is done inside of `duckdb.connect`, based on the passed in path + configuration
26
- # SparkContext can be compared to our Connection class, and SparkConf to our ClientContext class
27
-
28
-
29
- # data is a List of rows
30
- # every value in each row needs to be turned into a Value
31
- def _combine_data_and_schema(data: Iterable[Any], schema: StructType):
32
- from pyduckdb import Value
33
-
34
- new_data = []
35
- for row in data:
36
- new_row = [Value(x, dtype.duckdb_type) for x, dtype in zip(row, [y.dataType for y in schema])]
37
- new_data.append(new_row)
38
- return new_data
39
-
40
-
41
- class SparkSession:
42
- def __init__(self, context: SparkContext):
43
- self.conn = context.connection
44
- self._context = context
45
- self._conf = RuntimeConfig(self.conn)
46
-
47
- def _create_dataframe(self, data: Union[Iterable[Any], "PandasDataFrame"]) -> DataFrame:
48
- try:
49
- import pandas
50
-
51
- has_pandas = True
52
- except:
53
- has_pandas = False
54
- if has_pandas and isinstance(data, pandas.DataFrame):
55
- unique_name = f'pyspark_pandas_df_{uuid.uuid1()}'
56
- self.conn.register(unique_name, data)
57
- return DataFrame(self.conn.sql(f'select * from "{unique_name}"'), self)
58
-
59
- def verify_tuple_integrity(tuples):
60
- if len(tuples) <= 1:
61
- return
62
- assert all([len(x) == len(tuples[0]) for x in tuples[1:]])
63
-
64
- if not isinstance(data, list):
65
- data = list(data)
66
- verify_tuple_integrity(data)
67
-
68
- def construct_query(tuples) -> str:
69
- def construct_values_list(row, start_param_idx):
70
- parameter_count = len(row)
71
- parameters = [f'${x+start_param_idx}' for x in range(parameter_count)]
72
- parameters = '(' + ', '.join(parameters) + ')'
73
- return parameters
74
-
75
- row_size = len(tuples[0])
76
- values_list = [construct_values_list(x, 1 + (i * row_size)) for i, x in enumerate(tuples)]
77
- values_list = ', '.join(values_list)
78
-
79
- query = f"""
80
- select * from (values {values_list})
81
- """
82
- return query
83
-
84
- query = construct_query(data)
85
-
86
- def construct_parameters(tuples):
87
- parameters = []
88
- for row in tuples:
89
- parameters.extend(list(row))
90
- return parameters
91
-
92
- parameters = construct_parameters(data)
93
-
94
- rel = self.conn.sql(query, params=parameters)
95
- return DataFrame(rel, self)
96
-
97
- def createDataFrame(
98
- self,
99
- data: Union["PandasDataFrame", Iterable[Any]],
100
- schema: Optional[Union[StructType, List[str]]] = None,
101
- samplingRatio: Optional[float] = None,
102
- verifySchema: bool = True,
103
- ) -> DataFrame:
104
- if samplingRatio:
105
- raise NotImplementedError
106
- if not verifySchema:
107
- raise NotImplementedError
108
- types = None
109
- names = None
110
- if schema:
111
- if isinstance(schema, StructType):
112
- types, names = schema.extract_types_and_names()
113
- else:
114
- names = schema
115
-
116
- try:
117
- import pandas
118
-
119
- has_pandas = True
120
- except:
121
- has_pandas = False
122
- # Falsey check on pandas dataframe is not defined, so first check if it's not a pandas dataframe
123
- # Then check if 'data' is None or []
124
- # Finally check if a schema was provided
125
- is_empty = False
126
- if (not has_pandas or (has_pandas and not isinstance(data, pandas.DataFrame))) and not data and names:
127
- # Create NULLs for every type in our the dataframe
128
- is_empty = True
129
- data = [tuple(None for _ in names)]
130
-
131
- if schema and isinstance(schema, StructType):
132
- # Transform the data into Values to combine the data+schema
133
- data = _combine_data_and_schema(data, schema)
134
-
135
- df = self._create_dataframe(data)
136
- if is_empty:
137
- rel = df.relation
138
- # Add impossible where clause
139
- rel = rel.filter('1=0')
140
- df = DataFrame(rel, self)
141
-
142
- # Cast to types
143
- if types:
144
- df = df._cast_types(*types)
145
- # Alias to names
146
- if names:
147
- df = df.toDF(*names)
148
- return df
149
-
150
- def newSession(self) -> "SparkSession":
151
- return SparkSession(self._context)
152
-
153
- def range(
154
- self, start: int, end: Optional[int] = None, step: int = 1, numPartitions: Optional[int] = None
155
- ) -> "DataFrame":
156
- raise ContributionsAcceptedError
157
-
158
- def sql(self, sqlQuery: str, **kwargs: Any) -> DataFrame:
159
- if kwargs:
160
- raise NotImplementedError
161
- relation = self.conn.sql(sqlQuery)
162
- return DataFrame(relation, self)
163
-
164
- def stop(self) -> None:
165
- self._context.stop()
166
-
167
- def table(self, tableName: str) -> DataFrame:
168
- relation = self.conn.table(tableName)
169
- return DataFrame(relation, self)
170
-
171
- def getActiveSession(self) -> "SparkSession":
172
- return self
173
-
174
- @property
175
- def catalog(self) -> "Catalog":
176
- if not hasattr(self, "_catalog"):
177
- from pyduckdb.spark.sql.catalog import Catalog
178
-
179
- self._catalog = Catalog(self)
180
- return self._catalog
181
-
182
- @property
183
- def conf(self) -> RuntimeConfig:
184
- return self._conf
185
-
186
- @property
187
- def read(self) -> DataFrameReader:
188
- return DataFrameReader(self)
189
-
190
- @property
191
- def readStream(self) -> DataStreamReader:
192
- return DataStreamReader(self)
193
-
194
- @property
195
- def sparkContext(self) -> SparkContext:
196
- return self._context
197
-
198
- @property
199
- def streams(self) -> Any:
200
- raise ContributionsAcceptedError
201
-
202
- @property
203
- def udf(self) -> UDFRegistration:
204
- return UDFRegistration()
205
-
206
- @property
207
- def version(self) -> str:
208
- return '1.0.0'
209
-
210
- class Builder:
211
- def __init__(self):
212
- self.name = "builder"
213
- self._master = ':memory:'
214
- self._config = {}
215
-
216
- def master(self, name: str) -> "SparkSession.Builder":
217
- self._master = name
218
- return self
219
-
220
- def appName(self, name: str) -> "SparkSession.Builder":
221
- # no-op
222
- return self
223
-
224
- def remote(self, url: str) -> "SparkSession.Builder":
225
- # no-op
226
- return self
227
-
228
- def getOrCreate(self) -> "SparkSession":
229
- # TODO: use the config to pass in methods to 'connect'
230
- context = SparkContext(self._master)
231
- return SparkSession(context)
232
-
233
- def config(
234
- self, key: Optional[str] = None, value: Optional[Any] = None, conf: Optional[SparkConf] = None
235
- ) -> "SparkSession.Builder":
236
- if conf:
237
- raise NotImplementedError
238
- if key and value:
239
- self._config[key] = value
240
- return self
241
-
242
- def enableHiveSupport(self) -> "SparkSession.Builder":
243
- # no-op
244
- return self
245
-
246
- builder = Builder()
247
-
248
-
249
- __all__ = ["SparkSession"]
@@ -1,37 +0,0 @@
1
- from typing import TYPE_CHECKING, Optional, Union
2
- from pyduckdb.spark.sql.types import StructType
3
-
4
- if TYPE_CHECKING:
5
- from pyduckdb.spark.sql.dataframe import DataFrame
6
- from pyduckdb.spark.sql.session import SparkSession
7
-
8
- PrimitiveType = Union[bool, float, int, str]
9
- OptionalPrimitiveType = Optional[PrimitiveType]
10
-
11
-
12
- class DataStreamWriter:
13
- def __init__(self, dataframe: "DataFrame"):
14
- self.dataframe = dataframe
15
-
16
- def toTable(self, table_name: str) -> None:
17
- # Should we register the dataframe or create a table from the contents?
18
- raise NotImplementedError
19
-
20
-
21
- class DataStreamReader:
22
- def __init__(self, session: "SparkSession"):
23
- self.session = session
24
-
25
- def load(
26
- self,
27
- path: Optional[str] = None,
28
- format: Optional[str] = None,
29
- schema: Union[StructType, str, None] = None,
30
- **options: OptionalPrimitiveType
31
- ) -> "DataFrame":
32
- from pyduckdb.spark.sql.dataframe import DataFrame
33
-
34
- raise NotImplementedError
35
-
36
-
37
- __all__ = ["DataStreamReader", "DataStreamWriter"]
@@ -1,104 +0,0 @@
1
- import typing
2
- from duckdb.typing import DuckDBPyType
3
- from typing import List, Tuple, cast
4
- from .types import (
5
- DataType,
6
- StringType,
7
- BinaryType,
8
- BitstringType,
9
- UUIDType,
10
- BooleanType,
11
- DateType,
12
- TimestampType,
13
- TimestampNTZType,
14
- TimeType,
15
- TimeNTZType,
16
- TimestampNanosecondNTZType,
17
- TimestampMilisecondNTZType,
18
- TimestampSecondNTZType,
19
- DecimalType,
20
- DoubleType,
21
- FloatType,
22
- ByteType,
23
- UnsignedByteType,
24
- ShortType,
25
- UnsignedShortType,
26
- IntegerType,
27
- UnsignedIntegerType,
28
- LongType,
29
- UnsignedLongType,
30
- HugeIntegerType,
31
- DayTimeIntervalType,
32
- ArrayType,
33
- MapType,
34
- StructField,
35
- StructType,
36
- )
37
-
38
- _sqltype_to_spark_class = {
39
- 'boolean': BooleanType,
40
- 'utinyint': UnsignedByteType,
41
- 'tinyint': ByteType,
42
- 'usmallint': UnsignedShortType,
43
- 'smallint': ShortType,
44
- 'uinteger': UnsignedIntegerType,
45
- 'integer': IntegerType,
46
- 'ubigint': UnsignedLongType,
47
- 'bigint': LongType,
48
- 'hugeint': HugeIntegerType,
49
- 'varchar': StringType,
50
- 'blob': BinaryType,
51
- 'bit': BitstringType,
52
- 'uuid': UUIDType,
53
- 'date': DateType,
54
- 'time': TimeNTZType,
55
- 'time with time zone': TimeType,
56
- 'timestamp': TimestampNTZType,
57
- 'timestamp with time zone': TimestampType,
58
- 'timestamp_ms': TimestampNanosecondNTZType,
59
- 'timestamp_ns': TimestampMilisecondNTZType,
60
- 'timestamp_s': TimestampSecondNTZType,
61
- 'interval': DayTimeIntervalType,
62
- 'list': ArrayType,
63
- 'struct': StructType,
64
- 'map': MapType,
65
- # union
66
- # enum
67
- # null (???)
68
- 'float': FloatType,
69
- 'double': DoubleType,
70
- 'decimal': DecimalType,
71
- }
72
-
73
-
74
- def convert_nested_type(dtype: DuckDBPyType) -> DataType:
75
- id = dtype.id
76
- if id == 'list':
77
- children = dtype.children
78
- return ArrayType(convert_type(children[0][1]))
79
- # TODO: add support for 'union'
80
- if id == 'struct':
81
- children: List[Tuple[str, DuckDBPyType]] = dtype.children
82
- fields = [StructField(x[0], convert_type(x[1])) for x in children]
83
- return StructType(fields)
84
- if id == 'map':
85
- return MapType(convert_type(dtype.key), convert_type(dtype.value))
86
- raise NotImplementedError
87
-
88
-
89
- def convert_type(dtype: DuckDBPyType) -> DataType:
90
- id = dtype.id
91
- if id in ['list', 'struct', 'map']:
92
- return convert_nested_type(dtype)
93
- if id == 'decimal':
94
- children: List[Tuple[str, DuckDBPyType]] = dtype.children
95
- precision = cast(int, children[0][1])
96
- scale = cast(int, children[1][1])
97
- return DecimalType(precision, scale)
98
- spark_type = _sqltype_to_spark_class[id]
99
- return spark_type()
100
-
101
-
102
- def duckdb_to_spark_schema(names: List[str], types: List[DuckDBPyType]) -> StructType:
103
- fields = [StructField(name, dtype) for name, dtype in zip(names, [convert_type(x) for x in types])]
104
- return StructType(fields)
pyduckdb/spark/sql/udf.py DELETED
@@ -1,9 +0,0 @@
1
- # https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/
2
-
3
-
4
- class UDFRegistration:
5
- def __init__(self):
6
- raise NotImplementedError
7
-
8
-
9
- __all__ = ["UDFRegistration"]
File without changes