clickzetta-zettapark-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. clickzetta/zettapark/__init__.py +73 -0
  2. clickzetta/zettapark/_adaptor.py +193 -0
  3. clickzetta/zettapark/_internal/__init__.py +3 -0
  4. clickzetta/zettapark/_internal/analyzer/__init__.py +3 -0
  5. clickzetta/zettapark/_internal/analyzer/analyzer.py +1196 -0
  6. clickzetta/zettapark/_internal/analyzer/analyzer_utils.py +1498 -0
  7. clickzetta/zettapark/_internal/analyzer/binary_expression.py +107 -0
  8. clickzetta/zettapark/_internal/analyzer/binary_plan_node.py +191 -0
  9. clickzetta/zettapark/_internal/analyzer/cte_utils.py +121 -0
  10. clickzetta/zettapark/_internal/analyzer/datatype_mapper.py +238 -0
  11. clickzetta/zettapark/_internal/analyzer/expression.py +421 -0
  12. clickzetta/zettapark/_internal/analyzer/grouping_set.py +38 -0
  13. clickzetta/zettapark/_internal/analyzer/schema_utils.py +175 -0
  14. clickzetta/zettapark/_internal/analyzer/select_statement.py +1244 -0
  15. clickzetta/zettapark/_internal/analyzer/snowflake_plan.py +1434 -0
  16. clickzetta/zettapark/_internal/analyzer/snowflake_plan_node.py +157 -0
  17. clickzetta/zettapark/_internal/analyzer/sort_expression.py +57 -0
  18. clickzetta/zettapark/_internal/analyzer/table_function.py +117 -0
  19. clickzetta/zettapark/_internal/analyzer/table_merge_expression.py +87 -0
  20. clickzetta/zettapark/_internal/analyzer/unary_expression.py +110 -0
  21. clickzetta/zettapark/_internal/analyzer/unary_plan_node.py +142 -0
  22. clickzetta/zettapark/_internal/analyzer/window_expression.py +138 -0
  23. clickzetta/zettapark/_internal/code_generation.py +649 -0
  24. clickzetta/zettapark/_internal/df_pandas_utils.py +342 -0
  25. clickzetta/zettapark/_internal/error_message.py +441 -0
  26. clickzetta/zettapark/_internal/packaging_utils.py +526 -0
  27. clickzetta/zettapark/_internal/server_connection.py +803 -0
  28. clickzetta/zettapark/_internal/telemetry.py +341 -0
  29. clickzetta/zettapark/_internal/type_utils.py +807 -0
  30. clickzetta/zettapark/_internal/udf_utils.py +1145 -0
  31. clickzetta/zettapark/_internal/utils.py +872 -0
  32. clickzetta/zettapark/_internal/volume_utils.py +171 -0
  33. clickzetta/zettapark/async_job.py +398 -0
  34. clickzetta/zettapark/column.py +856 -0
  35. clickzetta/zettapark/context.py +20 -0
  36. clickzetta/zettapark/dataframe.py +4012 -0
  37. clickzetta/zettapark/dataframe_analytics_functions.py +700 -0
  38. clickzetta/zettapark/dataframe_na_functions.py +579 -0
  39. clickzetta/zettapark/dataframe_reader.py +723 -0
  40. clickzetta/zettapark/dataframe_stat_functions.py +260 -0
  41. clickzetta/zettapark/dataframe_writer.py +339 -0
  42. clickzetta/zettapark/exceptions.py +278 -0
  43. clickzetta/zettapark/file_operation.py +504 -0
  44. clickzetta/zettapark/files.py +213 -0
  45. clickzetta/zettapark/functions.py +8365 -0
  46. clickzetta/zettapark/mock/__init__.py +15 -0
  47. clickzetta/zettapark/mock/_analyzer.py +776 -0
  48. clickzetta/zettapark/mock/_connection.py +657 -0
  49. clickzetta/zettapark/mock/_constants.py +12 -0
  50. clickzetta/zettapark/mock/_file_operation.py +190 -0
  51. clickzetta/zettapark/mock/_functions.py +1003 -0
  52. clickzetta/zettapark/mock/_pandas_util.py +238 -0
  53. clickzetta/zettapark/mock/_plan.py +1886 -0
  54. clickzetta/zettapark/mock/_plan_builder.py +66 -0
  55. clickzetta/zettapark/mock/_select_statement.py +499 -0
  56. clickzetta/zettapark/mock/_snowflake_data_type.py +494 -0
  57. clickzetta/zettapark/mock/_snowflake_to_pandas_converter.py +197 -0
  58. clickzetta/zettapark/mock/_util.py +219 -0
  59. clickzetta/zettapark/mock/_window_utils.py +92 -0
  60. clickzetta/zettapark/py.typed +0 -0
  61. clickzetta/zettapark/query_history.py +39 -0
  62. clickzetta/zettapark/relational_grouped_dataframe.py +502 -0
  63. clickzetta/zettapark/row.py +328 -0
  64. clickzetta/zettapark/session.py +3038 -0
  65. clickzetta/zettapark/stored_procedure.py +872 -0
  66. clickzetta/zettapark/table.py +704 -0
  67. clickzetta/zettapark/table_function.py +287 -0
  68. clickzetta/zettapark/types.py +556 -0
  69. clickzetta/zettapark/udaf.py +700 -0
  70. clickzetta/zettapark/udf.py +904 -0
  71. clickzetta/zettapark/udtf.py +953 -0
  72. clickzetta/zettapark/version.py +7 -0
  73. clickzetta/zettapark/window.py +267 -0
  74. clickzetta_zettapark_python-0.1.0-py3.8-nspkg.pth +1 -0
  75. clickzetta_zettapark_python-0.1.0.dist-info/LICENSE +201 -0
  76. clickzetta_zettapark_python-0.1.0.dist-info/LICENSE.txt +202 -0
  77. clickzetta_zettapark_python-0.1.0.dist-info/METADATA +62 -0
  78. clickzetta_zettapark_python-0.1.0.dist-info/RECORD +81 -0
  79. clickzetta_zettapark_python-0.1.0.dist-info/WHEEL +5 -0
  80. clickzetta_zettapark_python-0.1.0.dist-info/namespace_packages.txt +1 -0
  81. clickzetta_zettapark_python-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ # Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
4
+ #
5
+
6
+ """
7
+ Contains core classes of Zettapark.
8
+ """
9
+
10
+ # types, udf, functions, exceptions still use its own modules
11
+
12
+ __all__ = [
13
+ "Column",
14
+ "CaseExpr",
15
+ "Row",
16
+ "Session",
17
+ "FileOperation",
18
+ "PutResult",
19
+ "GetResult",
20
+ "DataFrame",
21
+ "DataFrameStatFunctions",
22
+ "DataFrameAnalyticsFunctions",
23
+ "DataFrameNaFunctions",
24
+ "DataFrameWriter",
25
+ "DataFrameReader",
26
+ "GroupingSets",
27
+ "RelationalGroupedDataFrame",
28
+ "Window",
29
+ "WindowSpec",
30
+ "Table",
31
+ "UpdateResult",
32
+ "DeleteResult",
33
+ "MergeResult",
34
+ "WhenMatchedClause",
35
+ "WhenNotMatchedClause",
36
+ "QueryRecord",
37
+ "QueryHistory",
38
+ "AsyncJob",
39
+ ]
40
+
41
+
42
+ from clickzetta.zettapark.version import VERSION
43
+
44
+ __version__ = ".".join(str(x) for x in VERSION if x is not None)
45
+
46
+
47
+ from clickzetta.zettapark.async_job import AsyncJob
48
+ from clickzetta.zettapark.column import CaseExpr, Column
49
+ from clickzetta.zettapark.dataframe import DataFrame
50
+ from clickzetta.zettapark.dataframe_analytics_functions import (
51
+ DataFrameAnalyticsFunctions,
52
+ )
53
+ from clickzetta.zettapark.dataframe_na_functions import DataFrameNaFunctions
54
+ from clickzetta.zettapark.dataframe_reader import DataFrameReader
55
+ from clickzetta.zettapark.dataframe_stat_functions import DataFrameStatFunctions
56
+ from clickzetta.zettapark.dataframe_writer import DataFrameWriter
57
+ from clickzetta.zettapark.file_operation import FileOperation, GetResult, PutResult
58
+ from clickzetta.zettapark.query_history import QueryHistory, QueryRecord
59
+ from clickzetta.zettapark.relational_grouped_dataframe import (
60
+ GroupingSets,
61
+ RelationalGroupedDataFrame,
62
+ )
63
+ from clickzetta.zettapark.row import Row
64
+ from clickzetta.zettapark.session import Session
65
+ from clickzetta.zettapark.table import (
66
+ DeleteResult,
67
+ MergeResult,
68
+ Table,
69
+ UpdateResult,
70
+ WhenMatchedClause,
71
+ WhenNotMatchedClause,
72
+ )
73
+ from clickzetta.zettapark.window import Window, WindowSpec
@@ -0,0 +1,193 @@
1
+ #
2
+ # Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ import functools
6
+ import io
7
+ from logging import getLogger
8
+ from typing import IO, Dict, Optional, Sequence
9
+
10
+ import requests
11
+ from clickzetta.connector.v0.connection import (
12
+ Connection as _ConnectorConnection,
13
+ connect as _connector_connect,
14
+ )
15
+ from clickzetta.connector.v0.cursor import Cursor as _ConnectorCursor
16
+ from clickzetta.connector.v0.exceptions import ProgrammingError
17
+
18
+ from clickzetta.zettapark._internal.volume_utils import _parse_volume_uri
19
+ from clickzetta.zettapark.exceptions import (
20
+ ZettaparkSQLException,
21
+ ZettaparkUploadFileException,
22
+ )
23
+
24
+ _logger = getLogger(__name__)
25
+
26
+
27
+ def _raise_on_closed(exc_msg, exc_class=ProgrammingError, closed_attr_name="_closed"):
28
+ def _wrap_method(method):
29
+ def _wrapper(self, *args, **kwargs):
30
+ if getattr(self, closed_attr_name):
31
+ raise exc_class(exc_msg)
32
+ return method(self, *args, **kwargs)
33
+
34
+ functools.update_wrapper(_wrapper, method)
35
+ return _wrapper
36
+
37
+ def _wrap_class(class_):
38
+ for name in dir(class_):
39
+ if name == "is_closed":
40
+ continue
41
+ if name.startswith("_") and name != "__iter__":
42
+ continue
43
+ member = getattr(class_, name)
44
+ if not callable(member):
45
+ continue
46
+ if isinstance(class_.__dict__[name], (staticmethod, classmethod)):
47
+ continue
48
+ member = _wrap_method(member)
49
+ setattr(class_, name, member)
50
+
51
+ return class_
52
+
53
+ return _wrap_class
54
+
55
+
56
+ _OBJECT_TYPE_ALIAS = {"database": "workspace", "warehouse": "vcluster"}
57
+
58
+
59
+ class ClickzettaCursor(_ConnectorCursor):
60
+ def __init__(self, connection) -> None:
61
+ super().__init__(connection)
62
+ self._ignore_executemany_error = False
63
+
64
+ def execute(self, operation, parameters=None):
65
+ try:
66
+ return super().execute(operation, binding_params=parameters)
67
+ except Exception as exc:
68
+ if isinstance(exc, TypeError):
69
+ raise exc
70
+ raise ZettaparkSQLException(str(exc))
71
+
72
+ def executemany(self, operation: str, parameters: Sequence):
73
+ try:
74
+ return super().executemany(operation, seqparams=parameters)
75
+ except BaseException:
76
+ if not self._ignore_executemany_error:
77
+ raise
78
+ self.query = operation
79
+ self.job_id = "fake_id"
80
+
81
+
82
+ class ByteCountingStream(io.IOBase):
83
+ def __init__(self, input_stream) -> None:
84
+ self.input_stream = input_stream
85
+ self.bytes_read = 0
86
+
87
+ def read(self, size=-1):
88
+ data = self.input_stream.read(size)
89
+ self.bytes_read += len(data)
90
+ return data
91
+
92
+ def readinto(self, b):
93
+ bytes_read = self.input_stream.readinto(b)
94
+ self.bytes_read += bytes_read
95
+ return bytes_read
96
+
97
+ def __getattr__(self, name):
98
+ return getattr(self.input_stream, name)
99
+
100
+
101
+ class ClickzettaConnection(_ConnectorConnection):
102
+ def __init__(self, client=None) -> None:
103
+ super().__init__(client)
104
+ self._session_parameters = {}
105
+ self._client_prefetch_threads = 1
106
+
107
+ def use_object(self, object_name: str, object_type: str) -> None:
108
+ object_type = _OBJECT_TYPE_ALIAS.get(object_type, object_type)
109
+ getattr(self._client, object_type)
110
+ setattr(self._client, object_type, object_name)
111
+
112
+ def get_current_parameter(self, param: str) -> Optional[str]:
113
+ param = _OBJECT_TYPE_ALIAS.get(param, param)
114
+ return getattr(self._client, param)
115
+
116
+ def is_closed(self) -> bool:
117
+ return self._closed
118
+
119
+ # Zettapark requires close() safe to be called multiple times
120
+ def close(self):
121
+ if not self._closed:
122
+ super().close()
123
+
124
+ @property
125
+ def expired(self) -> bool:
126
+ return self._closed
127
+
128
+ def cursor(self):
129
+ # TODO(guantao.gao) pass a cursor factory to _ConnectorConnection for creating cursor
130
+ # in order to decouple from connector
131
+ if self._client.username is not None and self._client.password is not None:
132
+ self._client.refresh_token()
133
+ new_cursor = ClickzettaCursor(self)
134
+ self._cursors_created.add(new_cursor)
135
+ return new_cursor
136
+
137
+ def upload_stream(
138
+ self,
139
+ input_stream: IO[bytes],
140
+ volume_location: str,
141
+ *,
142
+ parallel: int = 4,
143
+ compress_data=False,
144
+ source_compression: Optional[str] = None,
145
+ overwrite=False,
146
+ ) -> Dict:
147
+ uri = _parse_volume_uri(volume_location)
148
+ if not uri:
149
+ raise ValueError(f"Invalid volume URI: {volume_location}")
150
+ if uri.path.endswith("/"):
151
+ raise ValueError(f"Invalid volume URI: not a file path: {volume_location}")
152
+
153
+ with self.cursor() as cursor:
154
+ cursor.execute(
155
+ f"SELECT get_presigned_url({uri.volume_identifier}, '{uri.path}', 1800, 'PUT')",
156
+ )
157
+ put_url = cursor.fetchone()[0]
158
+ # a workaround
159
+ if "-internal.aliyuncs.com/" in put_url:
160
+ put_url = put_url.replace("-internal.aliyuncs.com/", ".aliyuncs.com/")
161
+ input_stream = ByteCountingStream(input_stream)
162
+ _logger.debug(f"Uploading to {put_url}...")
163
+ try:
164
+ r = requests.put(put_url, data=input_stream, headers={}, timeout=60)
165
+ except requests.exceptions.Timeout:
166
+ raise ZettaparkUploadFileException(
167
+ f"Failed to upload file to {volume_location}: Timeout"
168
+ )
169
+ except BaseException as exc:
170
+ raise ZettaparkUploadFileException(
171
+ f"Failed to upload file to {volume_location}: {str(exc)}"
172
+ )
173
+ if r.status_code >= 400:
174
+ raise ZettaparkUploadFileException(
175
+ f"Failed to upload file to {volume_location}: {r.text}"
176
+ )
177
+ return {
178
+ "source": "-",
179
+ "target": uri.path,
180
+ "source_size": input_stream.bytes_read,
181
+ "target_size": input_stream.bytes_read,
182
+ "source_compression": "NONE",
183
+ "target_compression": "NONE",
184
+ "status": "UPLOADED",
185
+ "message": "",
186
+ }
187
+
188
+
189
+ @_raise_on_closed("Operating on a closed connection")
190
+ def clickzetta_connect(**kwargs) -> ClickzettaConnection:
191
+ conn = _connector_connect(**kwargs)
192
+ client = conn._client
193
+ return ClickzettaConnection(client)
@@ -0,0 +1,3 @@
1
+ #
2
+ # Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
3
+ #
@@ -0,0 +1,3 @@
1
+ #
2
+ # Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
3
+ #