duckdb 1.5.0.dev56__cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of duckdb might be problematic. Click here for more details.

Files changed (52) hide show
  1. _duckdb-stubs/__init__.pyi +1443 -0
  2. _duckdb-stubs/_func.pyi +46 -0
  3. _duckdb-stubs/_sqltypes.pyi +75 -0
  4. _duckdb.cpython-314-x86_64-linux-gnu.so +0 -0
  5. adbc_driver_duckdb/__init__.py +50 -0
  6. adbc_driver_duckdb/dbapi.py +115 -0
  7. duckdb/__init__.py +381 -0
  8. duckdb/_dbapi_type_object.py +231 -0
  9. duckdb/_version.py +22 -0
  10. duckdb/bytes_io_wrapper.py +69 -0
  11. duckdb/experimental/__init__.py +3 -0
  12. duckdb/experimental/spark/LICENSE +260 -0
  13. duckdb/experimental/spark/__init__.py +6 -0
  14. duckdb/experimental/spark/_globals.py +77 -0
  15. duckdb/experimental/spark/_typing.py +46 -0
  16. duckdb/experimental/spark/conf.py +46 -0
  17. duckdb/experimental/spark/context.py +180 -0
  18. duckdb/experimental/spark/errors/__init__.py +70 -0
  19. duckdb/experimental/spark/errors/error_classes.py +918 -0
  20. duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
  21. duckdb/experimental/spark/errors/exceptions/base.py +168 -0
  22. duckdb/experimental/spark/errors/utils.py +111 -0
  23. duckdb/experimental/spark/exception.py +18 -0
  24. duckdb/experimental/spark/sql/__init__.py +7 -0
  25. duckdb/experimental/spark/sql/_typing.py +86 -0
  26. duckdb/experimental/spark/sql/catalog.py +79 -0
  27. duckdb/experimental/spark/sql/column.py +361 -0
  28. duckdb/experimental/spark/sql/conf.py +24 -0
  29. duckdb/experimental/spark/sql/dataframe.py +1389 -0
  30. duckdb/experimental/spark/sql/functions.py +6195 -0
  31. duckdb/experimental/spark/sql/group.py +424 -0
  32. duckdb/experimental/spark/sql/readwriter.py +435 -0
  33. duckdb/experimental/spark/sql/session.py +297 -0
  34. duckdb/experimental/spark/sql/streaming.py +36 -0
  35. duckdb/experimental/spark/sql/type_utils.py +107 -0
  36. duckdb/experimental/spark/sql/types.py +1239 -0
  37. duckdb/experimental/spark/sql/udf.py +37 -0
  38. duckdb/filesystem.py +33 -0
  39. duckdb/func/__init__.py +3 -0
  40. duckdb/functional/__init__.py +13 -0
  41. duckdb/polars_io.py +284 -0
  42. duckdb/py.typed +0 -0
  43. duckdb/query_graph/__main__.py +358 -0
  44. duckdb/sqltypes/__init__.py +63 -0
  45. duckdb/typing/__init__.py +71 -0
  46. duckdb/udf.py +24 -0
  47. duckdb/value/__init__.py +1 -0
  48. duckdb/value/constant/__init__.py +270 -0
  49. duckdb-1.5.0.dev56.dist-info/METADATA +87 -0
  50. duckdb-1.5.0.dev56.dist-info/RECORD +52 -0
  51. duckdb-1.5.0.dev56.dist-info/WHEEL +6 -0
  52. duckdb-1.5.0.dev56.dist-info/licenses/LICENSE +7 -0
@@ -0,0 +1,16 @@
1
+ # # noqa: D104
2
+ # Licensed to the Apache Software Foundation (ASF) under one or more
3
+ # contributor license agreements. See the NOTICE file distributed with
4
+ # this work for additional information regarding copyright ownership.
5
+ # The ASF licenses this file to You under the Apache License, Version 2.0
6
+ # (the "License"); you may not use this file except in compliance with
7
+ # the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
@@ -0,0 +1,168 @@
1
+ from typing import Optional, cast # noqa: D100
2
+
3
+ from ..utils import ErrorClassesReader
4
+
5
+
6
+ class PySparkException(Exception):
7
+ """Base Exception for handling errors generated from PySpark."""
8
+
9
+ def __init__( # noqa: D107
10
+ self,
11
+ message: Optional[str] = None,
12
+ # The error class, decides the message format, must be one of the valid options listed in 'error_classes.py'
13
+ error_class: Optional[str] = None,
14
+ # The dictionary listing the arguments specified in the message (or the error_class)
15
+ message_parameters: Optional[dict[str, str]] = None,
16
+ ) -> None:
17
+ # `message` vs `error_class` & `message_parameters` are mutually exclusive.
18
+ assert (message is not None and (error_class is None and message_parameters is None)) or (
19
+ message is None and (error_class is not None and message_parameters is not None)
20
+ )
21
+
22
+ self.error_reader = ErrorClassesReader()
23
+
24
+ if message is None:
25
+ self.message = self.error_reader.get_error_message(
26
+ cast("str", error_class), cast("dict[str, str]", message_parameters)
27
+ )
28
+ else:
29
+ self.message = message
30
+
31
+ self.error_class = error_class
32
+ self.message_parameters = message_parameters
33
+
34
+ def getErrorClass(self) -> Optional[str]:
35
+ """Returns an error class as a string.
36
+
37
+ .. versionadded:: 3.4.0
38
+
39
+ See Also:
40
+ --------
41
+ :meth:`PySparkException.getMessageParameters`
42
+ :meth:`PySparkException.getSqlState`
43
+ """
44
+ return self.error_class
45
+
46
+ def getMessageParameters(self) -> Optional[dict[str, str]]:
47
+ """Returns a message parameters as a dictionary.
48
+
49
+ .. versionadded:: 3.4.0
50
+
51
+ See Also:
52
+ --------
53
+ :meth:`PySparkException.getErrorClass`
54
+ :meth:`PySparkException.getSqlState`
55
+ """
56
+ return self.message_parameters
57
+
58
+ def getSqlState(self) -> None:
59
+ """Returns an SQLSTATE as a string.
60
+
61
+ Errors generated in Python have no SQLSTATE, so it always returns None.
62
+
63
+ .. versionadded:: 3.4.0
64
+
65
+ See Also:
66
+ --------
67
+ :meth:`PySparkException.getErrorClass`
68
+ :meth:`PySparkException.getMessageParameters`
69
+ """
70
+ return None
71
+
72
+ def __str__(self) -> str: # noqa: D105
73
+ if self.getErrorClass() is not None:
74
+ return f"[{self.getErrorClass()}] {self.message}"
75
+ else:
76
+ return self.message
77
+
78
+
79
+ class AnalysisException(PySparkException):
80
+ """Failed to analyze a SQL query plan."""
81
+
82
+
83
+ class SessionNotSameException(PySparkException):
84
+ """Performed the same operation on different SparkSession."""
85
+
86
+
87
+ class TempTableAlreadyExistsException(AnalysisException):
88
+ """Failed to create temp view since it is already exists."""
89
+
90
+
91
+ class ParseException(AnalysisException):
92
+ """Failed to parse a SQL command."""
93
+
94
+
95
+ class IllegalArgumentException(PySparkException):
96
+ """Passed an illegal or inappropriate argument."""
97
+
98
+
99
+ class ArithmeticException(PySparkException):
100
+ """Arithmetic exception thrown from Spark with an error class."""
101
+
102
+
103
+ class UnsupportedOperationException(PySparkException):
104
+ """Unsupported operation exception thrown from Spark with an error class."""
105
+
106
+
107
+ class ArrayIndexOutOfBoundsException(PySparkException):
108
+ """Array index out of bounds exception thrown from Spark with an error class."""
109
+
110
+
111
+ class DateTimeException(PySparkException):
112
+ """Datetime exception thrown from Spark with an error class."""
113
+
114
+
115
+ class NumberFormatException(IllegalArgumentException):
116
+ """Number format exception thrown from Spark with an error class."""
117
+
118
+
119
+ class StreamingQueryException(PySparkException):
120
+ """Exception that stopped a :class:`StreamingQuery`."""
121
+
122
+
123
+ class QueryExecutionException(PySparkException):
124
+ """Failed to execute a query."""
125
+
126
+
127
+ class PythonException(PySparkException):
128
+ """Exceptions thrown from Python workers."""
129
+
130
+
131
+ class SparkRuntimeException(PySparkException):
132
+ """Runtime exception thrown from Spark with an error class."""
133
+
134
+
135
+ class SparkUpgradeException(PySparkException):
136
+ """Exception thrown because of Spark upgrade."""
137
+
138
+
139
+ class UnknownException(PySparkException):
140
+ """None of the above exceptions."""
141
+
142
+
143
+ class PySparkValueError(PySparkException, ValueError):
144
+ """Wrapper class for ValueError to support error classes."""
145
+
146
+
147
+ class PySparkIndexError(PySparkException, IndexError):
148
+ """Wrapper class for IndexError to support error classes."""
149
+
150
+
151
+ class PySparkTypeError(PySparkException, TypeError):
152
+ """Wrapper class for TypeError to support error classes."""
153
+
154
+
155
+ class PySparkAttributeError(PySparkException, AttributeError):
156
+ """Wrapper class for AttributeError to support error classes."""
157
+
158
+
159
+ class PySparkRuntimeError(PySparkException, RuntimeError):
160
+ """Wrapper class for RuntimeError to support error classes."""
161
+
162
+
163
+ class PySparkAssertionError(PySparkException, AssertionError):
164
+ """Wrapper class for AssertionError to support error classes."""
165
+
166
+
167
+ class PySparkNotImplementedError(PySparkException, NotImplementedError):
168
+ """Wrapper class for NotImplementedError to support error classes."""
@@ -0,0 +1,111 @@
1
+ # # noqa: D100
2
+ # Licensed to the Apache Software Foundation (ASF) under one or more
3
+ # contributor license agreements. See the NOTICE file distributed with
4
+ # this work for additional information regarding copyright ownership.
5
+ # The ASF licenses this file to You under the Apache License, Version 2.0
6
+ # (the "License"); you may not use this file except in compliance with
7
+ # the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+
18
+ import re
19
+
20
+ from .error_classes import ERROR_CLASSES_MAP
21
+
22
+
23
+ class ErrorClassesReader:
24
+ """A reader to load error information from error_classes.py."""
25
+
26
+ def __init__(self) -> None: # noqa: D107
27
+ self.error_info_map = ERROR_CLASSES_MAP
28
+
29
+ def get_error_message(self, error_class: str, message_parameters: dict[str, str]) -> str:
30
+ """Returns the completed error message by applying message parameters to the message template."""
31
+ message_template = self.get_message_template(error_class)
32
+ # Verify message parameters.
33
+ message_parameters_from_template = re.findall("<([a-zA-Z0-9_-]+)>", message_template)
34
+ assert set(message_parameters_from_template) == set(message_parameters), (
35
+ f"Undefined error message parameter for error class: {error_class}. Parameters: {message_parameters}"
36
+ )
37
+ table = str.maketrans("<>", "{}")
38
+
39
+ return message_template.translate(table).format(**message_parameters)
40
+
41
+ def get_message_template(self, error_class: str) -> str:
42
+ """Returns the message template for corresponding error class from error_classes.py.
43
+
44
+ For example,
45
+ when given `error_class` is "EXAMPLE_ERROR_CLASS",
46
+ and corresponding error class in error_classes.py looks like the below:
47
+
48
+ .. code-block:: python
49
+
50
+ "EXAMPLE_ERROR_CLASS" : {
51
+ "message" : [
52
+ "Problem <A> because of <B>."
53
+ ]
54
+ }
55
+
56
+ In this case, this function returns:
57
+ "Problem <A> because of <B>."
58
+
59
+ For sub error class, when given `error_class` is "EXAMPLE_ERROR_CLASS.SUB_ERROR_CLASS",
60
+ and corresponding error class in error_classes.py looks like the below:
61
+
62
+ .. code-block:: python
63
+
64
+ "EXAMPLE_ERROR_CLASS" : {
65
+ "message" : [
66
+ "Problem <A> because of <B>."
67
+ ],
68
+ "sub_class" : {
69
+ "SUB_ERROR_CLASS" : {
70
+ "message" : [
71
+ "Do <C> to fix the problem."
72
+ ]
73
+ }
74
+ }
75
+ }
76
+
77
+ In this case, this function returns:
78
+ "Problem <A> because <B>. Do <C> to fix the problem."
79
+ """
80
+ error_classes = error_class.split(".")
81
+ len_error_classes = len(error_classes)
82
+ assert len_error_classes in (1, 2)
83
+
84
+ # Generate message template for main error class.
85
+ main_error_class = error_classes[0]
86
+ if main_error_class in self.error_info_map:
87
+ main_error_class_info_map = self.error_info_map[main_error_class]
88
+ else:
89
+ msg = f"Cannot find main error class '{main_error_class}'"
90
+ raise ValueError(msg)
91
+
92
+ main_message_template = "\n".join(main_error_class_info_map["message"])
93
+
94
+ has_sub_class = len_error_classes == 2
95
+
96
+ if not has_sub_class:
97
+ message_template = main_message_template
98
+ else:
99
+ # Generate message template for sub error class if exists.
100
+ sub_error_class = error_classes[1]
101
+ main_error_class_subclass_info_map = main_error_class_info_map["sub_class"]
102
+ if sub_error_class in main_error_class_subclass_info_map:
103
+ sub_error_class_info_map = main_error_class_subclass_info_map[sub_error_class]
104
+ else:
105
+ msg = f"Cannot find sub error class '{sub_error_class}'"
106
+ raise ValueError(msg)
107
+
108
+ sub_message_template = "\n".join(sub_error_class_info_map["message"])
109
+ message_template = main_message_template + " " + sub_message_template
110
+
111
+ return message_template
@@ -0,0 +1,18 @@
1
+ # ruff: noqa: D100
2
+ from typing import Optional
3
+
4
+
5
+ class ContributionsAcceptedError(NotImplementedError):
6
+ """This method is not planned to be implemented, if you would like to implement this method
7
+ or show your interest in this method to other members of the community,
8
+ feel free to open up a PR or a Discussion over on https://github.com/duckdb/duckdb.
9
+ """ # noqa: D205
10
+
11
+ def __init__(self, message: Optional[str] = None) -> None: # noqa: D107
12
+ doc = self.__class__.__doc__
13
+ if message:
14
+ doc = message + "\n" + doc
15
+ super().__init__(doc)
16
+
17
+
18
+ __all__ = ["ContributionsAcceptedError"]
@@ -0,0 +1,7 @@
1
+ from .catalog import Catalog # noqa: D104
2
+ from .conf import RuntimeConfig
3
+ from .dataframe import DataFrame
4
+ from .readwriter import DataFrameWriter
5
+ from .session import SparkSession
6
+
7
+ __all__ = ["Catalog", "DataFrame", "DataFrameWriter", "RuntimeConfig", "SparkSession"]
@@ -0,0 +1,86 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+
19
+ from typing import (
20
+ Any,
21
+ Callable,
22
+ Optional,
23
+ TypeVar,
24
+ Union,
25
+ )
26
+
27
+ try:
28
+ from typing import Literal, Protocol
29
+ except ImportError:
30
+ from typing_extensions import Literal, Protocol
31
+
32
+ import datetime
33
+ import decimal
34
+
35
+ from .._typing import PrimitiveType
36
+ from . import types
37
+ from .column import Column
38
+
39
+ ColumnOrName = Union[Column, str]
40
+ ColumnOrName_ = TypeVar("ColumnOrName_", bound=ColumnOrName)
41
+ DecimalLiteral = decimal.Decimal
42
+ DateTimeLiteral = Union[datetime.datetime, datetime.date]
43
+ LiteralType = PrimitiveType
44
+ AtomicDataTypeOrString = Union[types.AtomicType, str]
45
+ DataTypeOrString = Union[types.DataType, str]
46
+ OptionalPrimitiveType = Optional[PrimitiveType]
47
+
48
+ AtomicValue = TypeVar(
49
+ "AtomicValue",
50
+ datetime.datetime,
51
+ datetime.date,
52
+ decimal.Decimal,
53
+ bool,
54
+ str,
55
+ int,
56
+ float,
57
+ )
58
+
59
+ RowLike = TypeVar("RowLike", list[Any], tuple[Any, ...], types.Row)
60
+
61
+ SQLBatchedUDFType = Literal[100]
62
+
63
+
64
+ class SupportsOpen(Protocol):
65
+ def open(self, partition_id: int, epoch_id: int) -> bool: ...
66
+
67
+
68
+ class SupportsProcess(Protocol):
69
+ def process(self, row: types.Row) -> None: ...
70
+
71
+
72
+ class SupportsClose(Protocol):
73
+ def close(self, error: Exception) -> None: ...
74
+
75
+
76
+ class UserDefinedFunctionLike(Protocol):
77
+ func: Callable[..., Any]
78
+ evalType: int
79
+ deterministic: bool
80
+
81
+ @property
82
+ def returnType(self) -> types.DataType: ...
83
+
84
+ def __call__(self, *args: ColumnOrName) -> Column: ...
85
+
86
+ def asNondeterministic(self) -> "UserDefinedFunctionLike": ...
@@ -0,0 +1,79 @@
1
+ from typing import NamedTuple, Optional, Union # noqa: D100
2
+
3
+ from .session import SparkSession
4
+
5
+
6
+ class Database(NamedTuple): # noqa: D101
7
+ name: str
8
+ description: Optional[str]
9
+ locationUri: str
10
+
11
+
12
+ class Table(NamedTuple): # noqa: D101
13
+ name: str
14
+ database: Optional[str]
15
+ description: Optional[str]
16
+ tableType: str
17
+ isTemporary: bool
18
+
19
+
20
+ class Column(NamedTuple): # noqa: D101
21
+ name: str
22
+ description: Optional[str]
23
+ dataType: str
24
+ nullable: bool
25
+ isPartition: bool
26
+ isBucket: bool
27
+
28
+
29
+ class Function(NamedTuple): # noqa: D101
30
+ name: str
31
+ description: Optional[str]
32
+ className: str
33
+ isTemporary: bool
34
+
35
+
36
+ class Catalog: # noqa: D101
37
+ def __init__(self, session: SparkSession) -> None: # noqa: D107
38
+ self._session = session
39
+
40
+ def listDatabases(self) -> list[Database]: # noqa: D102
41
+ res = self._session.conn.sql("select database_name from duckdb_databases()").fetchall()
42
+
43
+ def transform_to_database(x: list[str]) -> Database:
44
+ return Database(name=x[0], description=None, locationUri="")
45
+
46
+ databases = [transform_to_database(x) for x in res]
47
+ return databases
48
+
49
+ def listTables(self) -> list[Table]: # noqa: D102
50
+ res = self._session.conn.sql("select table_name, database_name, sql, temporary from duckdb_tables()").fetchall()
51
+
52
+ def transform_to_table(x: list[str]) -> Table:
53
+ return Table(name=x[0], database=x[1], description=x[2], tableType="", isTemporary=x[3])
54
+
55
+ tables = [transform_to_table(x) for x in res]
56
+ return tables
57
+
58
+ def listColumns(self, tableName: str, dbName: Optional[str] = None) -> list[Column]: # noqa: D102
59
+ query = f"""
60
+ select column_name, data_type, is_nullable from duckdb_columns() where table_name = '{tableName}'
61
+ """
62
+ if dbName:
63
+ query += f" and database_name = '{dbName}'"
64
+ res = self._session.conn.sql(query).fetchall()
65
+
66
+ def transform_to_column(x: list[Union[str, bool]]) -> Column:
67
+ return Column(name=x[0], description=None, dataType=x[1], nullable=x[2], isPartition=False, isBucket=False)
68
+
69
+ columns = [transform_to_column(x) for x in res]
70
+ return columns
71
+
72
+ def listFunctions(self, dbName: Optional[str] = None) -> list[Function]: # noqa: D102
73
+ raise NotImplementedError
74
+
75
+ def setCurrentDatabase(self, dbName: str) -> None: # noqa: D102
76
+ raise NotImplementedError
77
+
78
+
79
+ __all__ = ["Catalog", "Column", "Database", "Function", "Table"]