clickzetta-zettapark-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clickzetta/zettapark/__init__.py +73 -0
- clickzetta/zettapark/_adaptor.py +193 -0
- clickzetta/zettapark/_internal/__init__.py +3 -0
- clickzetta/zettapark/_internal/analyzer/__init__.py +3 -0
- clickzetta/zettapark/_internal/analyzer/analyzer.py +1196 -0
- clickzetta/zettapark/_internal/analyzer/analyzer_utils.py +1498 -0
- clickzetta/zettapark/_internal/analyzer/binary_expression.py +107 -0
- clickzetta/zettapark/_internal/analyzer/binary_plan_node.py +191 -0
- clickzetta/zettapark/_internal/analyzer/cte_utils.py +121 -0
- clickzetta/zettapark/_internal/analyzer/datatype_mapper.py +238 -0
- clickzetta/zettapark/_internal/analyzer/expression.py +421 -0
- clickzetta/zettapark/_internal/analyzer/grouping_set.py +38 -0
- clickzetta/zettapark/_internal/analyzer/schema_utils.py +175 -0
- clickzetta/zettapark/_internal/analyzer/select_statement.py +1244 -0
- clickzetta/zettapark/_internal/analyzer/snowflake_plan.py +1434 -0
- clickzetta/zettapark/_internal/analyzer/snowflake_plan_node.py +157 -0
- clickzetta/zettapark/_internal/analyzer/sort_expression.py +57 -0
- clickzetta/zettapark/_internal/analyzer/table_function.py +117 -0
- clickzetta/zettapark/_internal/analyzer/table_merge_expression.py +87 -0
- clickzetta/zettapark/_internal/analyzer/unary_expression.py +110 -0
- clickzetta/zettapark/_internal/analyzer/unary_plan_node.py +142 -0
- clickzetta/zettapark/_internal/analyzer/window_expression.py +138 -0
- clickzetta/zettapark/_internal/code_generation.py +649 -0
- clickzetta/zettapark/_internal/df_pandas_utils.py +342 -0
- clickzetta/zettapark/_internal/error_message.py +441 -0
- clickzetta/zettapark/_internal/packaging_utils.py +526 -0
- clickzetta/zettapark/_internal/server_connection.py +803 -0
- clickzetta/zettapark/_internal/telemetry.py +341 -0
- clickzetta/zettapark/_internal/type_utils.py +807 -0
- clickzetta/zettapark/_internal/udf_utils.py +1145 -0
- clickzetta/zettapark/_internal/utils.py +872 -0
- clickzetta/zettapark/_internal/volume_utils.py +171 -0
- clickzetta/zettapark/async_job.py +398 -0
- clickzetta/zettapark/column.py +856 -0
- clickzetta/zettapark/context.py +20 -0
- clickzetta/zettapark/dataframe.py +4012 -0
- clickzetta/zettapark/dataframe_analytics_functions.py +700 -0
- clickzetta/zettapark/dataframe_na_functions.py +579 -0
- clickzetta/zettapark/dataframe_reader.py +723 -0
- clickzetta/zettapark/dataframe_stat_functions.py +260 -0
- clickzetta/zettapark/dataframe_writer.py +339 -0
- clickzetta/zettapark/exceptions.py +278 -0
- clickzetta/zettapark/file_operation.py +504 -0
- clickzetta/zettapark/files.py +213 -0
- clickzetta/zettapark/functions.py +8365 -0
- clickzetta/zettapark/mock/__init__.py +15 -0
- clickzetta/zettapark/mock/_analyzer.py +776 -0
- clickzetta/zettapark/mock/_connection.py +657 -0
- clickzetta/zettapark/mock/_constants.py +12 -0
- clickzetta/zettapark/mock/_file_operation.py +190 -0
- clickzetta/zettapark/mock/_functions.py +1003 -0
- clickzetta/zettapark/mock/_pandas_util.py +238 -0
- clickzetta/zettapark/mock/_plan.py +1886 -0
- clickzetta/zettapark/mock/_plan_builder.py +66 -0
- clickzetta/zettapark/mock/_select_statement.py +499 -0
- clickzetta/zettapark/mock/_snowflake_data_type.py +494 -0
- clickzetta/zettapark/mock/_snowflake_to_pandas_converter.py +197 -0
- clickzetta/zettapark/mock/_util.py +219 -0
- clickzetta/zettapark/mock/_window_utils.py +92 -0
- clickzetta/zettapark/py.typed +0 -0
- clickzetta/zettapark/query_history.py +39 -0
- clickzetta/zettapark/relational_grouped_dataframe.py +502 -0
- clickzetta/zettapark/row.py +328 -0
- clickzetta/zettapark/session.py +3038 -0
- clickzetta/zettapark/stored_procedure.py +872 -0
- clickzetta/zettapark/table.py +704 -0
- clickzetta/zettapark/table_function.py +287 -0
- clickzetta/zettapark/types.py +556 -0
- clickzetta/zettapark/udaf.py +700 -0
- clickzetta/zettapark/udf.py +904 -0
- clickzetta/zettapark/udtf.py +953 -0
- clickzetta/zettapark/version.py +7 -0
- clickzetta/zettapark/window.py +267 -0
- clickzetta_zettapark_python-0.1.0-py3.8-nspkg.pth +1 -0
- clickzetta_zettapark_python-0.1.0.dist-info/LICENSE +201 -0
- clickzetta_zettapark_python-0.1.0.dist-info/LICENSE.txt +202 -0
- clickzetta_zettapark_python-0.1.0.dist-info/METADATA +62 -0
- clickzetta_zettapark_python-0.1.0.dist-info/RECORD +81 -0
- clickzetta_zettapark_python-0.1.0.dist-info/WHEEL +5 -0
- clickzetta_zettapark_python-0.1.0.dist-info/namespace_packages.txt +1 -0
- clickzetta_zettapark_python-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
Contains core classes of Zettapark.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
# types, udf, functions, exceptions still use its own modules
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"Column",
|
|
14
|
+
"CaseExpr",
|
|
15
|
+
"Row",
|
|
16
|
+
"Session",
|
|
17
|
+
"FileOperation",
|
|
18
|
+
"PutResult",
|
|
19
|
+
"GetResult",
|
|
20
|
+
"DataFrame",
|
|
21
|
+
"DataFrameStatFunctions",
|
|
22
|
+
"DataFrameAnalyticsFunctions",
|
|
23
|
+
"DataFrameNaFunctions",
|
|
24
|
+
"DataFrameWriter",
|
|
25
|
+
"DataFrameReader",
|
|
26
|
+
"GroupingSets",
|
|
27
|
+
"RelationalGroupedDataFrame",
|
|
28
|
+
"Window",
|
|
29
|
+
"WindowSpec",
|
|
30
|
+
"Table",
|
|
31
|
+
"UpdateResult",
|
|
32
|
+
"DeleteResult",
|
|
33
|
+
"MergeResult",
|
|
34
|
+
"WhenMatchedClause",
|
|
35
|
+
"WhenNotMatchedClause",
|
|
36
|
+
"QueryRecord",
|
|
37
|
+
"QueryHistory",
|
|
38
|
+
"AsyncJob",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
from clickzetta.zettapark.version import VERSION
|
|
43
|
+
|
|
44
|
+
__version__ = ".".join(str(x) for x in VERSION if x is not None)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
from clickzetta.zettapark.async_job import AsyncJob
|
|
48
|
+
from clickzetta.zettapark.column import CaseExpr, Column
|
|
49
|
+
from clickzetta.zettapark.dataframe import DataFrame
|
|
50
|
+
from clickzetta.zettapark.dataframe_analytics_functions import (
|
|
51
|
+
DataFrameAnalyticsFunctions,
|
|
52
|
+
)
|
|
53
|
+
from clickzetta.zettapark.dataframe_na_functions import DataFrameNaFunctions
|
|
54
|
+
from clickzetta.zettapark.dataframe_reader import DataFrameReader
|
|
55
|
+
from clickzetta.zettapark.dataframe_stat_functions import DataFrameStatFunctions
|
|
56
|
+
from clickzetta.zettapark.dataframe_writer import DataFrameWriter
|
|
57
|
+
from clickzetta.zettapark.file_operation import FileOperation, GetResult, PutResult
|
|
58
|
+
from clickzetta.zettapark.query_history import QueryHistory, QueryRecord
|
|
59
|
+
from clickzetta.zettapark.relational_grouped_dataframe import (
|
|
60
|
+
GroupingSets,
|
|
61
|
+
RelationalGroupedDataFrame,
|
|
62
|
+
)
|
|
63
|
+
from clickzetta.zettapark.row import Row
|
|
64
|
+
from clickzetta.zettapark.session import Session
|
|
65
|
+
from clickzetta.zettapark.table import (
|
|
66
|
+
DeleteResult,
|
|
67
|
+
MergeResult,
|
|
68
|
+
Table,
|
|
69
|
+
UpdateResult,
|
|
70
|
+
WhenMatchedClause,
|
|
71
|
+
WhenNotMatchedClause,
|
|
72
|
+
)
|
|
73
|
+
from clickzetta.zettapark.window import Window, WindowSpec
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
import io
|
|
7
|
+
from logging import getLogger
|
|
8
|
+
from typing import IO, Dict, Optional, Sequence
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
from clickzetta.connector.v0.connection import (
|
|
12
|
+
Connection as _ConnectorConnection,
|
|
13
|
+
connect as _connector_connect,
|
|
14
|
+
)
|
|
15
|
+
from clickzetta.connector.v0.cursor import Cursor as _ConnectorCursor
|
|
16
|
+
from clickzetta.connector.v0.exceptions import ProgrammingError
|
|
17
|
+
|
|
18
|
+
from clickzetta.zettapark._internal.volume_utils import _parse_volume_uri
|
|
19
|
+
from clickzetta.zettapark.exceptions import (
|
|
20
|
+
ZettaparkSQLException,
|
|
21
|
+
ZettaparkUploadFileException,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
_logger = getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _raise_on_closed(exc_msg, exc_class=ProgrammingError, closed_attr_name="_closed"):
|
|
28
|
+
def _wrap_method(method):
|
|
29
|
+
def _wrapper(self, *args, **kwargs):
|
|
30
|
+
if getattr(self, closed_attr_name):
|
|
31
|
+
raise exc_class(exc_msg)
|
|
32
|
+
return method(self, *args, **kwargs)
|
|
33
|
+
|
|
34
|
+
functools.update_wrapper(_wrapper, method)
|
|
35
|
+
return _wrapper
|
|
36
|
+
|
|
37
|
+
def _wrap_class(class_):
|
|
38
|
+
for name in dir(class_):
|
|
39
|
+
if name == "is_closed":
|
|
40
|
+
continue
|
|
41
|
+
if name.startswith("_") and name != "__iter__":
|
|
42
|
+
continue
|
|
43
|
+
member = getattr(class_, name)
|
|
44
|
+
if not callable(member):
|
|
45
|
+
continue
|
|
46
|
+
if isinstance(class_.__dict__[name], (staticmethod, classmethod)):
|
|
47
|
+
continue
|
|
48
|
+
member = _wrap_method(member)
|
|
49
|
+
setattr(class_, name, member)
|
|
50
|
+
|
|
51
|
+
return class_
|
|
52
|
+
|
|
53
|
+
return _wrap_class
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
_OBJECT_TYPE_ALIAS = {"database": "workspace", "warehouse": "vcluster"}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ClickzettaCursor(_ConnectorCursor):
|
|
60
|
+
def __init__(self, connection) -> None:
|
|
61
|
+
super().__init__(connection)
|
|
62
|
+
self._ignore_executemany_error = False
|
|
63
|
+
|
|
64
|
+
def execute(self, operation, parameters=None):
|
|
65
|
+
try:
|
|
66
|
+
return super().execute(operation, binding_params=parameters)
|
|
67
|
+
except Exception as exc:
|
|
68
|
+
if isinstance(exc, TypeError):
|
|
69
|
+
raise exc
|
|
70
|
+
raise ZettaparkSQLException(str(exc))
|
|
71
|
+
|
|
72
|
+
def executemany(self, operation: str, parameters: Sequence):
|
|
73
|
+
try:
|
|
74
|
+
return super().executemany(operation, seqparams=parameters)
|
|
75
|
+
except BaseException:
|
|
76
|
+
if not self._ignore_executemany_error:
|
|
77
|
+
raise
|
|
78
|
+
self.query = operation
|
|
79
|
+
self.job_id = "fake_id"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class ByteCountingStream(io.IOBase):
|
|
83
|
+
def __init__(self, input_stream) -> None:
|
|
84
|
+
self.input_stream = input_stream
|
|
85
|
+
self.bytes_read = 0
|
|
86
|
+
|
|
87
|
+
def read(self, size=-1):
|
|
88
|
+
data = self.input_stream.read(size)
|
|
89
|
+
self.bytes_read += len(data)
|
|
90
|
+
return data
|
|
91
|
+
|
|
92
|
+
def readinto(self, b):
|
|
93
|
+
bytes_read = self.input_stream.readinto(b)
|
|
94
|
+
self.bytes_read += bytes_read
|
|
95
|
+
return bytes_read
|
|
96
|
+
|
|
97
|
+
def __getattr__(self, name):
|
|
98
|
+
return getattr(self.input_stream, name)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class ClickzettaConnection(_ConnectorConnection):
|
|
102
|
+
def __init__(self, client=None) -> None:
|
|
103
|
+
super().__init__(client)
|
|
104
|
+
self._session_parameters = {}
|
|
105
|
+
self._client_prefetch_threads = 1
|
|
106
|
+
|
|
107
|
+
def use_object(self, object_name: str, object_type: str) -> None:
|
|
108
|
+
object_type = _OBJECT_TYPE_ALIAS.get(object_type, object_type)
|
|
109
|
+
getattr(self._client, object_type)
|
|
110
|
+
setattr(self._client, object_type, object_name)
|
|
111
|
+
|
|
112
|
+
def get_current_parameter(self, param: str) -> Optional[str]:
|
|
113
|
+
param = _OBJECT_TYPE_ALIAS.get(param, param)
|
|
114
|
+
return getattr(self._client, param)
|
|
115
|
+
|
|
116
|
+
def is_closed(self) -> bool:
|
|
117
|
+
return self._closed
|
|
118
|
+
|
|
119
|
+
# Zettapark requires close() safe to be called multiple times
|
|
120
|
+
def close(self):
|
|
121
|
+
if not self._closed:
|
|
122
|
+
super().close()
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def expired(self) -> bool:
|
|
126
|
+
return self._closed
|
|
127
|
+
|
|
128
|
+
def cursor(self):
|
|
129
|
+
# TODO(guantao.gao) pass a cursor factory to _ConnectorConnection for creating cursor
|
|
130
|
+
# in order to decouple from connector
|
|
131
|
+
if self._client.username is not None and self._client.password is not None:
|
|
132
|
+
self._client.refresh_token()
|
|
133
|
+
new_cursor = ClickzettaCursor(self)
|
|
134
|
+
self._cursors_created.add(new_cursor)
|
|
135
|
+
return new_cursor
|
|
136
|
+
|
|
137
|
+
def upload_stream(
|
|
138
|
+
self,
|
|
139
|
+
input_stream: IO[bytes],
|
|
140
|
+
volume_location: str,
|
|
141
|
+
*,
|
|
142
|
+
parallel: int = 4,
|
|
143
|
+
compress_data=False,
|
|
144
|
+
source_compression: Optional[str] = None,
|
|
145
|
+
overwrite=False,
|
|
146
|
+
) -> Dict:
|
|
147
|
+
uri = _parse_volume_uri(volume_location)
|
|
148
|
+
if not uri:
|
|
149
|
+
raise ValueError(f"Invalid volume URI: {volume_location}")
|
|
150
|
+
if uri.path.endswith("/"):
|
|
151
|
+
raise ValueError(f"Invalid volume URI: not a file path: {volume_location}")
|
|
152
|
+
|
|
153
|
+
with self.cursor() as cursor:
|
|
154
|
+
cursor.execute(
|
|
155
|
+
f"SELECT get_presigned_url({uri.volume_identifier}, '{uri.path}', 1800, 'PUT')",
|
|
156
|
+
)
|
|
157
|
+
put_url = cursor.fetchone()[0]
|
|
158
|
+
# a workaround
|
|
159
|
+
if "-internal.aliyuncs.com/" in put_url:
|
|
160
|
+
put_url = put_url.replace("-internal.aliyuncs.com/", ".aliyuncs.com/")
|
|
161
|
+
input_stream = ByteCountingStream(input_stream)
|
|
162
|
+
_logger.debug(f"Uploading to {put_url}...")
|
|
163
|
+
try:
|
|
164
|
+
r = requests.put(put_url, data=input_stream, headers={}, timeout=60)
|
|
165
|
+
except requests.exceptions.Timeout:
|
|
166
|
+
raise ZettaparkUploadFileException(
|
|
167
|
+
f"Failed to upload file to {volume_location}: Timeout"
|
|
168
|
+
)
|
|
169
|
+
except BaseException as exc:
|
|
170
|
+
raise ZettaparkUploadFileException(
|
|
171
|
+
f"Failed to upload file to {volume_location}: {str(exc)}"
|
|
172
|
+
)
|
|
173
|
+
if r.status_code >= 400:
|
|
174
|
+
raise ZettaparkUploadFileException(
|
|
175
|
+
f"Failed to upload file to {volume_location}: {r.text}"
|
|
176
|
+
)
|
|
177
|
+
return {
|
|
178
|
+
"source": "-",
|
|
179
|
+
"target": uri.path,
|
|
180
|
+
"source_size": input_stream.bytes_read,
|
|
181
|
+
"target_size": input_stream.bytes_read,
|
|
182
|
+
"source_compression": "NONE",
|
|
183
|
+
"target_compression": "NONE",
|
|
184
|
+
"status": "UPLOADED",
|
|
185
|
+
"message": "",
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@_raise_on_closed("Operating on a closed connection")
|
|
190
|
+
def clickzetta_connect(**kwargs) -> ClickzettaConnection:
|
|
191
|
+
conn = _connector_connect(**kwargs)
|
|
192
|
+
client = conn._client
|
|
193
|
+
return ClickzettaConnection(client)
|