clickhouse-driver 0.2.10__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clickhouse_driver/__init__.py +9 -0
- clickhouse_driver/block.py +227 -0
- clickhouse_driver/blockstreamprofileinfo.py +22 -0
- clickhouse_driver/bufferedreader.cpython-313-darwin.so +0 -0
- clickhouse_driver/bufferedwriter.cpython-313-darwin.so +0 -0
- clickhouse_driver/client.py +812 -0
- clickhouse_driver/clientinfo.py +119 -0
- clickhouse_driver/columns/__init__.py +0 -0
- clickhouse_driver/columns/arraycolumn.py +161 -0
- clickhouse_driver/columns/base.py +221 -0
- clickhouse_driver/columns/boolcolumn.py +7 -0
- clickhouse_driver/columns/datecolumn.py +108 -0
- clickhouse_driver/columns/datetimecolumn.py +203 -0
- clickhouse_driver/columns/decimalcolumn.py +116 -0
- clickhouse_driver/columns/enumcolumn.py +129 -0
- clickhouse_driver/columns/exceptions.py +12 -0
- clickhouse_driver/columns/floatcolumn.py +34 -0
- clickhouse_driver/columns/intcolumn.py +157 -0
- clickhouse_driver/columns/intervalcolumn.py +33 -0
- clickhouse_driver/columns/ipcolumn.py +118 -0
- clickhouse_driver/columns/jsoncolumn.py +37 -0
- clickhouse_driver/columns/largeint.cpython-313-darwin.so +0 -0
- clickhouse_driver/columns/lowcardinalitycolumn.py +142 -0
- clickhouse_driver/columns/mapcolumn.py +73 -0
- clickhouse_driver/columns/nestedcolumn.py +10 -0
- clickhouse_driver/columns/nothingcolumn.py +13 -0
- clickhouse_driver/columns/nullablecolumn.py +7 -0
- clickhouse_driver/columns/nullcolumn.py +15 -0
- clickhouse_driver/columns/numpy/__init__.py +0 -0
- clickhouse_driver/columns/numpy/base.py +47 -0
- clickhouse_driver/columns/numpy/boolcolumn.py +8 -0
- clickhouse_driver/columns/numpy/datecolumn.py +19 -0
- clickhouse_driver/columns/numpy/datetimecolumn.py +146 -0
- clickhouse_driver/columns/numpy/floatcolumn.py +24 -0
- clickhouse_driver/columns/numpy/intcolumn.py +43 -0
- clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +96 -0
- clickhouse_driver/columns/numpy/service.py +58 -0
- clickhouse_driver/columns/numpy/stringcolumn.py +78 -0
- clickhouse_driver/columns/numpy/tuplecolumn.py +37 -0
- clickhouse_driver/columns/service.py +185 -0
- clickhouse_driver/columns/simpleaggregatefunctioncolumn.py +7 -0
- clickhouse_driver/columns/stringcolumn.py +73 -0
- clickhouse_driver/columns/tuplecolumn.py +63 -0
- clickhouse_driver/columns/util.py +61 -0
- clickhouse_driver/columns/uuidcolumn.py +64 -0
- clickhouse_driver/compression/__init__.py +32 -0
- clickhouse_driver/compression/base.py +87 -0
- clickhouse_driver/compression/lz4.py +21 -0
- clickhouse_driver/compression/lz4hc.py +9 -0
- clickhouse_driver/compression/zstd.py +20 -0
- clickhouse_driver/connection.py +825 -0
- clickhouse_driver/context.py +36 -0
- clickhouse_driver/dbapi/__init__.py +62 -0
- clickhouse_driver/dbapi/connection.py +99 -0
- clickhouse_driver/dbapi/cursor.py +370 -0
- clickhouse_driver/dbapi/errors.py +40 -0
- clickhouse_driver/dbapi/extras.py +73 -0
- clickhouse_driver/defines.py +58 -0
- clickhouse_driver/errors.py +453 -0
- clickhouse_driver/log.py +48 -0
- clickhouse_driver/numpy/__init__.py +0 -0
- clickhouse_driver/numpy/block.py +8 -0
- clickhouse_driver/numpy/helpers.py +28 -0
- clickhouse_driver/numpy/result.py +123 -0
- clickhouse_driver/opentelemetry.py +43 -0
- clickhouse_driver/progress.py +44 -0
- clickhouse_driver/protocol.py +130 -0
- clickhouse_driver/queryprocessingstage.py +8 -0
- clickhouse_driver/reader.py +69 -0
- clickhouse_driver/readhelpers.py +26 -0
- clickhouse_driver/result.py +144 -0
- clickhouse_driver/settings/__init__.py +0 -0
- clickhouse_driver/settings/available.py +405 -0
- clickhouse_driver/settings/types.py +50 -0
- clickhouse_driver/settings/writer.py +34 -0
- clickhouse_driver/streams/__init__.py +0 -0
- clickhouse_driver/streams/compressed.py +88 -0
- clickhouse_driver/streams/native.py +108 -0
- clickhouse_driver/util/__init__.py +0 -0
- clickhouse_driver/util/compat.py +39 -0
- clickhouse_driver/util/escape.py +94 -0
- clickhouse_driver/util/helpers.py +173 -0
- clickhouse_driver/varint.cpython-313-darwin.so +0 -0
- clickhouse_driver/writer.py +67 -0
- clickhouse_driver-0.2.10.dist-info/METADATA +215 -0
- clickhouse_driver-0.2.10.dist-info/RECORD +89 -0
- clickhouse_driver-0.2.10.dist-info/WHEEL +6 -0
- clickhouse_driver-0.2.10.dist-info/licenses/LICENSE +21 -0
- clickhouse_driver-0.2.10.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,812 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from collections import deque
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
from time import time
|
|
5
|
+
import types
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
from . import errors, defines
|
|
9
|
+
from .block import ColumnOrientedBlock, RowOrientedBlock
|
|
10
|
+
from .connection import Connection
|
|
11
|
+
from .log import log_block
|
|
12
|
+
from .protocol import ServerPacketTypes
|
|
13
|
+
from .result import (
|
|
14
|
+
IterQueryResult, ProgressQueryResult, QueryResult, QueryInfo
|
|
15
|
+
)
|
|
16
|
+
from .util.escape import escape_params
|
|
17
|
+
from .util.helpers import column_chunks, chunks, parse_url
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Client(object):
|
|
21
|
+
"""
|
|
22
|
+
Client for communication with the ClickHouse server.
|
|
23
|
+
Single connection is established per each connected instance of the client.
|
|
24
|
+
|
|
25
|
+
:param settings: Dictionary of settings that passed to every query (except
|
|
26
|
+
for the client settings, see below). Defaults to ``None``
|
|
27
|
+
(no additional settings). See all available settings in
|
|
28
|
+
`ClickHouse docs
|
|
29
|
+
<https://clickhouse.com/docs/en/operations/settings/settings/>`_.
|
|
30
|
+
:param \\**kwargs: All other args are passed to the
|
|
31
|
+
:py:class:`~clickhouse_driver.connection.Connection`
|
|
32
|
+
constructor.
|
|
33
|
+
|
|
34
|
+
The following keys when passed in ``settings`` are used for configuring the
|
|
35
|
+
client itself:
|
|
36
|
+
|
|
37
|
+
* ``insert_block_size`` -- chunk size to split rows for ``INSERT``.
|
|
38
|
+
Defaults to ``1048576``.
|
|
39
|
+
* ``strings_as_bytes`` -- turns off string column encoding/decoding.
|
|
40
|
+
* ``strings_encoding`` -- specifies string encoding. UTF-8 by default.
|
|
41
|
+
* ``use_numpy`` -- Use NumPy for columns reading. New in version
|
|
42
|
+
*0.2.0*.
|
|
43
|
+
* ``opentelemetry_traceparent`` -- OpenTelemetry traceparent header as
|
|
44
|
+
described by W3C Trace Context recommendation.
|
|
45
|
+
New in version *0.2.2*.
|
|
46
|
+
* ``opentelemetry_tracestate`` -- OpenTelemetry tracestate header as
|
|
47
|
+
described by W3C Trace Context recommendation.
|
|
48
|
+
New in version *0.2.2*.
|
|
49
|
+
* ``quota_key`` -- A string to differentiate quotas when the user have
|
|
50
|
+
keyed quotas configured on server.
|
|
51
|
+
New in version *0.2.3*.
|
|
52
|
+
* ``input_format_null_as_default`` -- Initialize null fields with
|
|
53
|
+
default values if data type of this field is not
|
|
54
|
+
nullable. Does not work for NumPy. Default: False.
|
|
55
|
+
New in version *0.2.4*.
|
|
56
|
+
* ``round_robin`` -- If ``alt_hosts`` are provided the query will be
|
|
57
|
+
executed on host picked with round-robin algorithm.
|
|
58
|
+
New in version *0.2.5*.
|
|
59
|
+
* ``namedtuple_as_json`` -- Controls named tuple and nested types
|
|
60
|
+
deserialization. To interpret these column alongside
|
|
61
|
+
with ``allow_experimental_object_type=1`` as Python
|
|
62
|
+
tuple set ``namedtuple_as_json`` to ``False``.
|
|
63
|
+
Default: True.
|
|
64
|
+
New in version *0.2.6*.
|
|
65
|
+
* ``server_side_params`` -- Species on which side query parameters
|
|
66
|
+
should be rendered into placeholders.
|
|
67
|
+
Default: False. Means that parameters are rendered
|
|
68
|
+
on driver's side.
|
|
69
|
+
New in version *0.2.7*.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
available_client_settings = (
|
|
73
|
+
'insert_block_size', # TODO: rename to max_insert_block_size
|
|
74
|
+
'strings_as_bytes',
|
|
75
|
+
'strings_encoding',
|
|
76
|
+
'use_numpy',
|
|
77
|
+
'opentelemetry_traceparent',
|
|
78
|
+
'opentelemetry_tracestate',
|
|
79
|
+
'quota_key',
|
|
80
|
+
'input_format_null_as_default',
|
|
81
|
+
'namedtuple_as_json',
|
|
82
|
+
'server_side_params'
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def __init__(self, *args, **kwargs):
|
|
86
|
+
self.settings = (kwargs.pop('settings', None) or {}).copy()
|
|
87
|
+
|
|
88
|
+
self.client_settings = {
|
|
89
|
+
'insert_block_size': int(self.settings.pop(
|
|
90
|
+
'insert_block_size', defines.DEFAULT_INSERT_BLOCK_SIZE,
|
|
91
|
+
)),
|
|
92
|
+
'strings_as_bytes': self.settings.pop(
|
|
93
|
+
'strings_as_bytes', False
|
|
94
|
+
),
|
|
95
|
+
'strings_encoding': self.settings.pop(
|
|
96
|
+
'strings_encoding', defines.STRINGS_ENCODING
|
|
97
|
+
),
|
|
98
|
+
'use_numpy': self.settings.pop(
|
|
99
|
+
'use_numpy', False
|
|
100
|
+
),
|
|
101
|
+
'opentelemetry_traceparent': self.settings.pop(
|
|
102
|
+
'opentelemetry_traceparent', None
|
|
103
|
+
),
|
|
104
|
+
'opentelemetry_tracestate': self.settings.pop(
|
|
105
|
+
'opentelemetry_tracestate', ''
|
|
106
|
+
),
|
|
107
|
+
'quota_key': self.settings.pop(
|
|
108
|
+
'quota_key', ''
|
|
109
|
+
),
|
|
110
|
+
'input_format_null_as_default': self.settings.pop(
|
|
111
|
+
'input_format_null_as_default', False
|
|
112
|
+
),
|
|
113
|
+
'namedtuple_as_json': self.settings.pop(
|
|
114
|
+
'namedtuple_as_json', True
|
|
115
|
+
),
|
|
116
|
+
'server_side_params': self.settings.pop(
|
|
117
|
+
'server_side_params', False
|
|
118
|
+
)
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if self.client_settings['use_numpy']:
|
|
122
|
+
try:
|
|
123
|
+
from .numpy.result import (
|
|
124
|
+
NumpyIterQueryResult, NumpyProgressQueryResult,
|
|
125
|
+
NumpyQueryResult
|
|
126
|
+
)
|
|
127
|
+
self.query_result_cls = NumpyQueryResult
|
|
128
|
+
self.iter_query_result_cls = NumpyIterQueryResult
|
|
129
|
+
self.progress_query_result_cls = NumpyProgressQueryResult
|
|
130
|
+
except ImportError:
|
|
131
|
+
raise RuntimeError('Extras for NumPy must be installed')
|
|
132
|
+
else:
|
|
133
|
+
self.query_result_cls = QueryResult
|
|
134
|
+
self.iter_query_result_cls = IterQueryResult
|
|
135
|
+
self.progress_query_result_cls = ProgressQueryResult
|
|
136
|
+
|
|
137
|
+
round_robin = kwargs.pop('round_robin', False)
|
|
138
|
+
self.connections = deque([Connection(*args, **kwargs)])
|
|
139
|
+
|
|
140
|
+
if round_robin and 'alt_hosts' in kwargs:
|
|
141
|
+
alt_hosts = kwargs.pop('alt_hosts')
|
|
142
|
+
for host in alt_hosts.split(','):
|
|
143
|
+
url = urlparse('clickhouse://' + host)
|
|
144
|
+
|
|
145
|
+
connection_kwargs = kwargs.copy()
|
|
146
|
+
num_args = len(args)
|
|
147
|
+
if num_args >= 2:
|
|
148
|
+
# host and port as positional arguments
|
|
149
|
+
connection_args = (url.hostname, url.port) + args[2:]
|
|
150
|
+
elif num_args >= 1:
|
|
151
|
+
# host as positional and port as keyword argument
|
|
152
|
+
connection_args = (url.hostname, ) + args[1:]
|
|
153
|
+
connection_kwargs['port'] = url.port
|
|
154
|
+
else:
|
|
155
|
+
# host and port as keyword arguments
|
|
156
|
+
connection_args = tuple()
|
|
157
|
+
connection_kwargs['host'] = url.hostname
|
|
158
|
+
connection_kwargs['port'] = url.port
|
|
159
|
+
|
|
160
|
+
connection = Connection(*connection_args, **connection_kwargs)
|
|
161
|
+
self.connections.append(connection)
|
|
162
|
+
|
|
163
|
+
self.connection = self.get_connection()
|
|
164
|
+
self.reset_last_query()
|
|
165
|
+
super(Client, self).__init__()
|
|
166
|
+
|
|
167
|
+
def __enter__(self):
|
|
168
|
+
return self
|
|
169
|
+
|
|
170
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
171
|
+
self.disconnect()
|
|
172
|
+
|
|
173
|
+
def get_connection(self):
|
|
174
|
+
if hasattr(self, 'connection'):
|
|
175
|
+
self.connections.append(self.connection)
|
|
176
|
+
|
|
177
|
+
connection = self.connections.popleft()
|
|
178
|
+
|
|
179
|
+
connection.context.settings = self.settings
|
|
180
|
+
connection.context.client_settings = self.client_settings
|
|
181
|
+
return connection
|
|
182
|
+
|
|
183
|
+
def disconnect(self):
|
|
184
|
+
self.disconnect_connection()
|
|
185
|
+
for connection in self.connections:
|
|
186
|
+
connection.disconnect()
|
|
187
|
+
|
|
188
|
+
def disconnect_connection(self):
|
|
189
|
+
"""
|
|
190
|
+
Disconnects from the server.
|
|
191
|
+
"""
|
|
192
|
+
self.connection.disconnect()
|
|
193
|
+
self.reset_last_query()
|
|
194
|
+
|
|
195
|
+
def reset_last_query(self):
|
|
196
|
+
self.last_query = None
|
|
197
|
+
|
|
198
|
+
def receive_result(self, with_column_types=False, progress=False,
|
|
199
|
+
columnar=False):
|
|
200
|
+
|
|
201
|
+
gen = self.packet_generator()
|
|
202
|
+
|
|
203
|
+
if progress:
|
|
204
|
+
return self.progress_query_result_cls(
|
|
205
|
+
gen, with_column_types=with_column_types, columnar=columnar
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
else:
|
|
209
|
+
result = self.query_result_cls(
|
|
210
|
+
gen, with_column_types=with_column_types, columnar=columnar
|
|
211
|
+
)
|
|
212
|
+
return result.get_result()
|
|
213
|
+
|
|
214
|
+
def iter_receive_result(self, with_column_types=False):
|
|
215
|
+
gen = self.packet_generator()
|
|
216
|
+
|
|
217
|
+
result = self.iter_query_result_cls(
|
|
218
|
+
gen, with_column_types=with_column_types
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
for rows in result:
|
|
222
|
+
for row in rows:
|
|
223
|
+
yield row
|
|
224
|
+
|
|
225
|
+
def packet_generator(self):
|
|
226
|
+
while True:
|
|
227
|
+
try:
|
|
228
|
+
packet = self.receive_packet()
|
|
229
|
+
if not packet:
|
|
230
|
+
break
|
|
231
|
+
|
|
232
|
+
if packet is True:
|
|
233
|
+
continue
|
|
234
|
+
|
|
235
|
+
yield packet
|
|
236
|
+
|
|
237
|
+
except (Exception, KeyboardInterrupt):
|
|
238
|
+
self.disconnect()
|
|
239
|
+
raise
|
|
240
|
+
|
|
241
|
+
def receive_packet(self):
|
|
242
|
+
packet = self.connection.receive_packet()
|
|
243
|
+
|
|
244
|
+
if packet.type == ServerPacketTypes.EXCEPTION:
|
|
245
|
+
raise packet.exception
|
|
246
|
+
|
|
247
|
+
elif packet.type == ServerPacketTypes.PROGRESS:
|
|
248
|
+
self.last_query.store_progress(packet.progress)
|
|
249
|
+
return packet
|
|
250
|
+
|
|
251
|
+
elif packet.type == ServerPacketTypes.END_OF_STREAM:
|
|
252
|
+
return False
|
|
253
|
+
|
|
254
|
+
elif packet.type == ServerPacketTypes.DATA:
|
|
255
|
+
return packet
|
|
256
|
+
|
|
257
|
+
elif packet.type == ServerPacketTypes.TOTALS:
|
|
258
|
+
return packet
|
|
259
|
+
|
|
260
|
+
elif packet.type == ServerPacketTypes.EXTREMES:
|
|
261
|
+
return packet
|
|
262
|
+
|
|
263
|
+
elif packet.type == ServerPacketTypes.PROFILE_INFO:
|
|
264
|
+
self.last_query.store_profile(packet.profile_info)
|
|
265
|
+
return True
|
|
266
|
+
|
|
267
|
+
else:
|
|
268
|
+
return True
|
|
269
|
+
|
|
270
|
+
def make_query_settings(self, settings):
|
|
271
|
+
settings = dict(settings or {})
|
|
272
|
+
|
|
273
|
+
# Pick client-related settings.
|
|
274
|
+
client_settings = self.client_settings.copy()
|
|
275
|
+
for key in self.available_client_settings:
|
|
276
|
+
if key in settings:
|
|
277
|
+
client_settings[key] = settings.pop(key)
|
|
278
|
+
|
|
279
|
+
self.connection.context.client_settings = client_settings
|
|
280
|
+
|
|
281
|
+
# The rest of settings are ClickHouse-related.
|
|
282
|
+
query_settings = self.settings.copy()
|
|
283
|
+
query_settings.update(settings)
|
|
284
|
+
self.connection.context.settings = query_settings
|
|
285
|
+
|
|
286
|
+
def track_current_database(self, query):
|
|
287
|
+
query = query.strip('; ')
|
|
288
|
+
if query.lower().startswith('use '):
|
|
289
|
+
self.connection.database = query[4:].strip()
|
|
290
|
+
|
|
291
|
+
def establish_connection(self, settings):
|
|
292
|
+
num_connections = len(self.connections)
|
|
293
|
+
if hasattr(self, 'connection'):
|
|
294
|
+
num_connections += 1
|
|
295
|
+
|
|
296
|
+
for i in range(num_connections):
|
|
297
|
+
try:
|
|
298
|
+
self.connection = self.get_connection()
|
|
299
|
+
self.make_query_settings(settings)
|
|
300
|
+
self.connection.force_connect()
|
|
301
|
+
self.last_query = QueryInfo()
|
|
302
|
+
|
|
303
|
+
except (errors.SocketTimeoutError, errors.NetworkError):
|
|
304
|
+
if i < num_connections - 1:
|
|
305
|
+
continue
|
|
306
|
+
raise
|
|
307
|
+
|
|
308
|
+
return
|
|
309
|
+
|
|
310
|
+
@contextmanager
|
|
311
|
+
def disconnect_on_error(self, query, settings):
|
|
312
|
+
try:
|
|
313
|
+
self.establish_connection(settings)
|
|
314
|
+
self.connection.server_info.session_timezone = None
|
|
315
|
+
|
|
316
|
+
yield
|
|
317
|
+
|
|
318
|
+
self.track_current_database(query)
|
|
319
|
+
|
|
320
|
+
except (Exception, KeyboardInterrupt):
|
|
321
|
+
self.disconnect()
|
|
322
|
+
raise
|
|
323
|
+
|
|
324
|
+
def execute(self, query, params=None, with_column_types=False,
|
|
325
|
+
external_tables=None, query_id=None, settings=None,
|
|
326
|
+
types_check=False, columnar=False):
|
|
327
|
+
"""
|
|
328
|
+
Executes query.
|
|
329
|
+
|
|
330
|
+
Establishes new connection if it wasn't established yet.
|
|
331
|
+
After query execution connection remains intact for next queries.
|
|
332
|
+
If connection can't be reused it will be closed and new connection will
|
|
333
|
+
be created.
|
|
334
|
+
|
|
335
|
+
:param query: query that will be send to server.
|
|
336
|
+
:param params: substitution parameters for SELECT queries and data for
|
|
337
|
+
INSERT queries. Data for INSERT can be `list`, `tuple`
|
|
338
|
+
or :data:`~types.GeneratorType`.
|
|
339
|
+
Defaults to ``None`` (no parameters or data).
|
|
340
|
+
:param with_column_types: if specified column names and types will be
|
|
341
|
+
returned alongside with result.
|
|
342
|
+
Defaults to ``False``.
|
|
343
|
+
:param external_tables: external tables to send.
|
|
344
|
+
Defaults to ``None`` (no external tables).
|
|
345
|
+
:param query_id: the query identifier. If no query id specified
|
|
346
|
+
ClickHouse server will generate it.
|
|
347
|
+
:param settings: dictionary of query settings.
|
|
348
|
+
Defaults to ``None`` (no additional settings).
|
|
349
|
+
:param types_check: enables type checking of data for INSERT queries.
|
|
350
|
+
Causes additional overhead. Defaults to ``False``.
|
|
351
|
+
:param columnar: if specified the result of the SELECT query will be
|
|
352
|
+
returned in column-oriented form.
|
|
353
|
+
It also allows to INSERT data in columnar form.
|
|
354
|
+
Defaults to ``False`` (row-like form).
|
|
355
|
+
|
|
356
|
+
:return: * number of inserted rows for INSERT queries with data.
|
|
357
|
+
Returning rows count from INSERT FROM SELECT is not
|
|
358
|
+
supported.
|
|
359
|
+
* if `with_column_types=False`: `list` of `tuples` with
|
|
360
|
+
rows/columns.
|
|
361
|
+
* if `with_column_types=True`: `tuple` of 2 elements:
|
|
362
|
+
* The first element is `list` of `tuples` with
|
|
363
|
+
rows/columns.
|
|
364
|
+
* The second element information is about columns: names
|
|
365
|
+
and types.
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
start_time = time()
|
|
369
|
+
|
|
370
|
+
with self.disconnect_on_error(query, settings):
|
|
371
|
+
# INSERT queries can use list/tuple/generator of list/tuples/dicts.
|
|
372
|
+
# For SELECT parameters can be passed in only in dict right now.
|
|
373
|
+
is_insert = isinstance(params, (list, tuple, types.GeneratorType))
|
|
374
|
+
|
|
375
|
+
if is_insert:
|
|
376
|
+
rv = self.process_insert_query(
|
|
377
|
+
query, params, external_tables=external_tables,
|
|
378
|
+
query_id=query_id, types_check=types_check,
|
|
379
|
+
columnar=columnar
|
|
380
|
+
)
|
|
381
|
+
else:
|
|
382
|
+
rv = self.process_ordinary_query(
|
|
383
|
+
query, params=params, with_column_types=with_column_types,
|
|
384
|
+
external_tables=external_tables,
|
|
385
|
+
query_id=query_id, types_check=types_check,
|
|
386
|
+
columnar=columnar
|
|
387
|
+
)
|
|
388
|
+
self.last_query.store_elapsed(time() - start_time)
|
|
389
|
+
return rv
|
|
390
|
+
|
|
391
|
+
def execute_with_progress(
|
|
392
|
+
self, query, params=None, with_column_types=False,
|
|
393
|
+
external_tables=None, query_id=None, settings=None,
|
|
394
|
+
types_check=False, columnar=False):
|
|
395
|
+
"""
|
|
396
|
+
Executes SELECT query with progress information.
|
|
397
|
+
See, :ref:`execute-with-progress`.
|
|
398
|
+
|
|
399
|
+
:param query: query that will be send to server.
|
|
400
|
+
:param params: substitution parameters for SELECT queries and data for
|
|
401
|
+
INSERT queries. Data for INSERT can be `list`, `tuple`
|
|
402
|
+
or :data:`~types.GeneratorType`.
|
|
403
|
+
Defaults to ``None`` (no parameters or data).
|
|
404
|
+
:param with_column_types: if specified column names and types will be
|
|
405
|
+
returned alongside with result.
|
|
406
|
+
Defaults to ``False``.
|
|
407
|
+
:param external_tables: external tables to send.
|
|
408
|
+
Defaults to ``None`` (no external tables).
|
|
409
|
+
:param query_id: the query identifier. If no query id specified
|
|
410
|
+
ClickHouse server will generate it.
|
|
411
|
+
:param settings: dictionary of query settings.
|
|
412
|
+
Defaults to ``None`` (no additional settings).
|
|
413
|
+
:param types_check: enables type checking of data for INSERT queries.
|
|
414
|
+
Causes additional overhead. Defaults to ``False``.
|
|
415
|
+
:param columnar: if specified the result will be returned in
|
|
416
|
+
column-oriented form.
|
|
417
|
+
Defaults to ``False`` (row-like form).
|
|
418
|
+
:return: :ref:`progress-query-result` proxy.
|
|
419
|
+
"""
|
|
420
|
+
|
|
421
|
+
with self.disconnect_on_error(query, settings):
|
|
422
|
+
return self.process_ordinary_query_with_progress(
|
|
423
|
+
query, params=params, with_column_types=with_column_types,
|
|
424
|
+
external_tables=external_tables, query_id=query_id,
|
|
425
|
+
types_check=types_check, columnar=columnar
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
def execute_iter(
|
|
429
|
+
self, query, params=None, with_column_types=False,
|
|
430
|
+
external_tables=None, query_id=None, settings=None,
|
|
431
|
+
types_check=False, chunk_size=1):
|
|
432
|
+
"""
|
|
433
|
+
*New in version 0.0.14.*
|
|
434
|
+
|
|
435
|
+
Executes SELECT query with results streaming. See, :ref:`execute-iter`.
|
|
436
|
+
|
|
437
|
+
:param query: query that will be send to server.
|
|
438
|
+
:param params: substitution parameters for SELECT queries and data for
|
|
439
|
+
INSERT queries. Data for INSERT can be `list`, `tuple`
|
|
440
|
+
or :data:`~types.GeneratorType`.
|
|
441
|
+
Defaults to ``None`` (no parameters or data).
|
|
442
|
+
:param with_column_types: if specified column names and types will be
|
|
443
|
+
returned alongside with result.
|
|
444
|
+
Defaults to ``False``.
|
|
445
|
+
:param external_tables: external tables to send.
|
|
446
|
+
Defaults to ``None`` (no external tables).
|
|
447
|
+
:param query_id: the query identifier. If no query id specified
|
|
448
|
+
ClickHouse server will generate it.
|
|
449
|
+
:param settings: dictionary of query settings.
|
|
450
|
+
Defaults to ``None`` (no additional settings).
|
|
451
|
+
:param types_check: enables type checking of data for INSERT queries.
|
|
452
|
+
Causes additional overhead. Defaults to ``False``.
|
|
453
|
+
:param chunk_size: chunk query results.
|
|
454
|
+
:return: :ref:`iter-query-result` proxy.
|
|
455
|
+
"""
|
|
456
|
+
with self.disconnect_on_error(query, settings):
|
|
457
|
+
rv = self.iter_process_ordinary_query(
|
|
458
|
+
query, params=params, with_column_types=with_column_types,
|
|
459
|
+
external_tables=external_tables,
|
|
460
|
+
query_id=query_id, types_check=types_check
|
|
461
|
+
)
|
|
462
|
+
return chunks(rv, chunk_size) if chunk_size > 1 else rv
|
|
463
|
+
|
|
464
|
+
def query_dataframe(
|
|
465
|
+
self, query, params=None, external_tables=None, query_id=None,
|
|
466
|
+
settings=None, replace_nonwords=True):
|
|
467
|
+
"""
|
|
468
|
+
*New in version 0.2.0.*
|
|
469
|
+
|
|
470
|
+
Queries DataFrame with specified SELECT query.
|
|
471
|
+
|
|
472
|
+
:param query: query that will be send to server.
|
|
473
|
+
:param params: substitution parameters.
|
|
474
|
+
Defaults to ``None`` (no parameters or data).
|
|
475
|
+
:param external_tables: external tables to send.
|
|
476
|
+
Defaults to ``None`` (no external tables).
|
|
477
|
+
:param query_id: the query identifier. If no query id specified
|
|
478
|
+
ClickHouse server will generate it.
|
|
479
|
+
:param settings: dictionary of query settings.
|
|
480
|
+
Defaults to ``None`` (no additional settings).
|
|
481
|
+
:param replace_nonwords: boolean to replace non-words in column names
|
|
482
|
+
to underscores. Defaults to ``True``.
|
|
483
|
+
:return: pandas DataFrame.
|
|
484
|
+
"""
|
|
485
|
+
|
|
486
|
+
try:
|
|
487
|
+
import pandas as pd
|
|
488
|
+
except ImportError:
|
|
489
|
+
raise RuntimeError('Extras for NumPy must be installed')
|
|
490
|
+
|
|
491
|
+
data, columns = self.execute(
|
|
492
|
+
query, columnar=True, with_column_types=True, params=params,
|
|
493
|
+
external_tables=external_tables, query_id=query_id,
|
|
494
|
+
settings=settings
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
columns = [name for name, type_ in columns]
|
|
498
|
+
if replace_nonwords:
|
|
499
|
+
columns = [re.sub(r'\W', '_', x) for x in columns]
|
|
500
|
+
|
|
501
|
+
return pd.DataFrame(
|
|
502
|
+
{col: d for d, col in zip(data, columns)}, columns=columns
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
def insert_dataframe(
|
|
506
|
+
self, query, dataframe, external_tables=None, query_id=None,
|
|
507
|
+
settings=None):
|
|
508
|
+
"""
|
|
509
|
+
*New in version 0.2.0.*
|
|
510
|
+
|
|
511
|
+
Inserts pandas DataFrame with specified query.
|
|
512
|
+
|
|
513
|
+
:param query: query that will be send to server.
|
|
514
|
+
:param dataframe: pandas DataFrame.
|
|
515
|
+
:param external_tables: external tables to send.
|
|
516
|
+
Defaults to ``None`` (no external tables).
|
|
517
|
+
:param query_id: the query identifier. If no query id specified
|
|
518
|
+
ClickHouse server will generate it.
|
|
519
|
+
:param settings: dictionary of query settings.
|
|
520
|
+
Defaults to ``None`` (no additional settings).
|
|
521
|
+
:return: number of inserted rows.
|
|
522
|
+
"""
|
|
523
|
+
|
|
524
|
+
try:
|
|
525
|
+
import pandas as pd # noqa: F401
|
|
526
|
+
except ImportError:
|
|
527
|
+
raise RuntimeError('Extras for NumPy must be installed')
|
|
528
|
+
|
|
529
|
+
start_time = time()
|
|
530
|
+
|
|
531
|
+
with self.disconnect_on_error(query, settings):
|
|
532
|
+
self.connection.send_query(query, query_id=query_id)
|
|
533
|
+
self.connection.send_external_tables(external_tables)
|
|
534
|
+
|
|
535
|
+
sample_block = self.receive_sample_block()
|
|
536
|
+
rv = None
|
|
537
|
+
if sample_block:
|
|
538
|
+
columns = [x[0] for x in sample_block.columns_with_types]
|
|
539
|
+
# raise if any columns are missing from the dataframe
|
|
540
|
+
diff = set(columns) - set(dataframe.columns)
|
|
541
|
+
if len(diff):
|
|
542
|
+
msg = "DataFrame missing required columns: {}"
|
|
543
|
+
raise ValueError(msg.format(list(diff)))
|
|
544
|
+
|
|
545
|
+
data = [dataframe[column].values for column in columns]
|
|
546
|
+
rv = self.send_data(sample_block, data, columnar=True)
|
|
547
|
+
self.receive_end_of_query()
|
|
548
|
+
|
|
549
|
+
self.last_query.store_elapsed(time() - start_time)
|
|
550
|
+
return rv
|
|
551
|
+
|
|
552
|
+
def process_ordinary_query_with_progress(
|
|
553
|
+
self, query, params=None, with_column_types=False,
|
|
554
|
+
external_tables=None, query_id=None,
|
|
555
|
+
types_check=False, columnar=False):
|
|
556
|
+
|
|
557
|
+
if params is not None:
|
|
558
|
+
query = self.substitute_params(
|
|
559
|
+
query, params, self.connection.context
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
self.connection.send_query(query, query_id=query_id, params=params)
|
|
563
|
+
self.connection.send_external_tables(external_tables,
|
|
564
|
+
types_check=types_check)
|
|
565
|
+
return self.receive_result(with_column_types=with_column_types,
|
|
566
|
+
progress=True, columnar=columnar)
|
|
567
|
+
|
|
568
|
+
def process_ordinary_query(
|
|
569
|
+
self, query, params=None, with_column_types=False,
|
|
570
|
+
external_tables=None, query_id=None,
|
|
571
|
+
types_check=False, columnar=False):
|
|
572
|
+
|
|
573
|
+
if params is not None:
|
|
574
|
+
query = self.substitute_params(
|
|
575
|
+
query, params, self.connection.context
|
|
576
|
+
)
|
|
577
|
+
self.connection.send_query(query, query_id=query_id, params=params)
|
|
578
|
+
self.connection.send_external_tables(external_tables,
|
|
579
|
+
types_check=types_check)
|
|
580
|
+
return self.receive_result(with_column_types=with_column_types,
|
|
581
|
+
columnar=columnar)
|
|
582
|
+
|
|
583
|
+
def iter_process_ordinary_query(
|
|
584
|
+
self, query, params=None, with_column_types=False,
|
|
585
|
+
external_tables=None, query_id=None,
|
|
586
|
+
types_check=False):
|
|
587
|
+
|
|
588
|
+
if params is not None:
|
|
589
|
+
query = self.substitute_params(
|
|
590
|
+
query, params, self.connection.context
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
self.connection.send_query(query, query_id=query_id, params=params)
|
|
594
|
+
self.connection.send_external_tables(external_tables,
|
|
595
|
+
types_check=types_check)
|
|
596
|
+
return self.iter_receive_result(with_column_types=with_column_types)
|
|
597
|
+
|
|
598
|
+
def process_insert_query(self, query_without_data, data,
|
|
599
|
+
external_tables=None, query_id=None,
|
|
600
|
+
types_check=False, columnar=False):
|
|
601
|
+
self.connection.send_query(query_without_data, query_id=query_id)
|
|
602
|
+
self.connection.send_external_tables(external_tables,
|
|
603
|
+
types_check=types_check)
|
|
604
|
+
sample_block = self.receive_sample_block()
|
|
605
|
+
|
|
606
|
+
if sample_block:
|
|
607
|
+
rv = self.send_data(sample_block, data,
|
|
608
|
+
types_check=types_check, columnar=columnar)
|
|
609
|
+
self.receive_end_of_insert_query()
|
|
610
|
+
return rv
|
|
611
|
+
|
|
612
|
+
def receive_sample_block(self):
|
|
613
|
+
while True:
|
|
614
|
+
packet = self.connection.receive_packet()
|
|
615
|
+
|
|
616
|
+
if packet.type == ServerPacketTypes.DATA:
|
|
617
|
+
return packet.block
|
|
618
|
+
|
|
619
|
+
elif packet.type == ServerPacketTypes.EXCEPTION:
|
|
620
|
+
raise packet.exception
|
|
621
|
+
|
|
622
|
+
elif packet.type == ServerPacketTypes.LOG:
|
|
623
|
+
log_block(packet.block)
|
|
624
|
+
|
|
625
|
+
elif packet.type == ServerPacketTypes.TABLE_COLUMNS:
|
|
626
|
+
pass
|
|
627
|
+
|
|
628
|
+
else:
|
|
629
|
+
message = self.connection.unexpected_packet_message(
|
|
630
|
+
'Data, Exception, Log or TableColumns', packet.type
|
|
631
|
+
)
|
|
632
|
+
raise errors.UnexpectedPacketFromServerError(message)
|
|
633
|
+
|
|
634
|
+
def send_data(self, sample_block, data, types_check=False, columnar=False):
|
|
635
|
+
inserted_rows = 0
|
|
636
|
+
|
|
637
|
+
client_settings = self.connection.context.client_settings
|
|
638
|
+
block_cls = ColumnOrientedBlock if columnar else RowOrientedBlock
|
|
639
|
+
|
|
640
|
+
if client_settings['use_numpy']:
|
|
641
|
+
try:
|
|
642
|
+
from .numpy.helpers import column_chunks as numpy_column_chunks
|
|
643
|
+
|
|
644
|
+
if columnar:
|
|
645
|
+
slicer = numpy_column_chunks
|
|
646
|
+
else:
|
|
647
|
+
raise ValueError(
|
|
648
|
+
'NumPy inserts is only allowed with columnar=True'
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
except ImportError:
|
|
652
|
+
raise RuntimeError('Extras for NumPy must be installed')
|
|
653
|
+
|
|
654
|
+
else:
|
|
655
|
+
slicer = column_chunks if columnar else chunks
|
|
656
|
+
|
|
657
|
+
for chunk in slicer(data, client_settings['insert_block_size']):
|
|
658
|
+
block = block_cls(sample_block.columns_with_types, chunk,
|
|
659
|
+
types_check=types_check)
|
|
660
|
+
self.connection.send_data(block)
|
|
661
|
+
inserted_rows += block.num_rows
|
|
662
|
+
|
|
663
|
+
# Starting from the specific revision there are profile events
|
|
664
|
+
# sent by server in response to each inserted block
|
|
665
|
+
self.receive_profile_events()
|
|
666
|
+
|
|
667
|
+
# Empty block means end of data.
|
|
668
|
+
self.connection.send_data(block_cls())
|
|
669
|
+
# If enabled by revision profile events are also sent after empty block
|
|
670
|
+
self.receive_profile_events()
|
|
671
|
+
|
|
672
|
+
return inserted_rows
|
|
673
|
+
|
|
674
|
+
def receive_end_of_query(self):
|
|
675
|
+
while True:
|
|
676
|
+
packet = self.connection.receive_packet()
|
|
677
|
+
|
|
678
|
+
if packet.type == ServerPacketTypes.END_OF_STREAM:
|
|
679
|
+
break
|
|
680
|
+
|
|
681
|
+
elif packet.type == ServerPacketTypes.PROGRESS:
|
|
682
|
+
self.last_query.store_progress(packet.progress)
|
|
683
|
+
|
|
684
|
+
elif packet.type == ServerPacketTypes.EXCEPTION:
|
|
685
|
+
raise packet.exception
|
|
686
|
+
|
|
687
|
+
elif packet.type == ServerPacketTypes.LOG:
|
|
688
|
+
log_block(packet.block)
|
|
689
|
+
|
|
690
|
+
elif packet.type == ServerPacketTypes.TABLE_COLUMNS:
|
|
691
|
+
pass
|
|
692
|
+
|
|
693
|
+
elif packet.type == ServerPacketTypes.PROFILE_EVENTS:
|
|
694
|
+
self.last_query.store_profile(packet.profile_info)
|
|
695
|
+
|
|
696
|
+
else:
|
|
697
|
+
message = self.connection.unexpected_packet_message(
|
|
698
|
+
'Exception, EndOfStream, Progress, TableColumns, '
|
|
699
|
+
'ProfileEvents or Log', packet.type
|
|
700
|
+
)
|
|
701
|
+
raise errors.UnexpectedPacketFromServerError(message)
|
|
702
|
+
|
|
703
|
+
def receive_end_of_insert_query(self):
|
|
704
|
+
while True:
|
|
705
|
+
packet = self.connection.receive_packet()
|
|
706
|
+
|
|
707
|
+
if packet.type == ServerPacketTypes.END_OF_STREAM:
|
|
708
|
+
break
|
|
709
|
+
|
|
710
|
+
elif packet.type == ServerPacketTypes.LOG:
|
|
711
|
+
log_block(packet.block)
|
|
712
|
+
|
|
713
|
+
elif packet.type == ServerPacketTypes.PROGRESS:
|
|
714
|
+
self.last_query.store_progress(packet.progress)
|
|
715
|
+
|
|
716
|
+
elif packet.type == ServerPacketTypes.EXCEPTION:
|
|
717
|
+
raise packet.exception
|
|
718
|
+
|
|
719
|
+
else:
|
|
720
|
+
message = self.connection.unexpected_packet_message(
|
|
721
|
+
'EndOfStream, Log, Progress or Exception', packet.type
|
|
722
|
+
)
|
|
723
|
+
raise errors.UnexpectedPacketFromServerError(message)
|
|
724
|
+
|
|
725
|
+
def receive_profile_events(self):
|
|
726
|
+
revision = self.connection.server_info.used_revision
|
|
727
|
+
if (
|
|
728
|
+
revision <
|
|
729
|
+
defines.DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT
|
|
730
|
+
):
|
|
731
|
+
return None
|
|
732
|
+
|
|
733
|
+
while True:
|
|
734
|
+
packet = self.connection.receive_packet()
|
|
735
|
+
|
|
736
|
+
if packet.type == ServerPacketTypes.PROFILE_EVENTS:
|
|
737
|
+
self.last_query.store_profile(packet.profile_info)
|
|
738
|
+
break
|
|
739
|
+
|
|
740
|
+
elif packet.type == ServerPacketTypes.PROGRESS:
|
|
741
|
+
self.last_query.store_progress(packet.progress)
|
|
742
|
+
|
|
743
|
+
elif packet.type == ServerPacketTypes.LOG:
|
|
744
|
+
log_block(packet.block)
|
|
745
|
+
|
|
746
|
+
elif packet.type == ServerPacketTypes.EXCEPTION:
|
|
747
|
+
raise packet.exception
|
|
748
|
+
|
|
749
|
+
elif packet.type == ServerPacketTypes.TIMEZONE_UPDATE:
|
|
750
|
+
pass
|
|
751
|
+
|
|
752
|
+
else:
|
|
753
|
+
message = self.connection.unexpected_packet_message(
|
|
754
|
+
'ProfileEvents, Progress, Log, Exception or '
|
|
755
|
+
'TimezoneUpdate', packet.type
|
|
756
|
+
)
|
|
757
|
+
raise errors.UnexpectedPacketFromServerError(message)
|
|
758
|
+
|
|
759
|
+
def cancel(self, with_column_types=False):
|
|
760
|
+
# TODO: Add warning if already cancelled.
|
|
761
|
+
self.connection.send_cancel()
|
|
762
|
+
# Client must still read until END_OF_STREAM packet.
|
|
763
|
+
return self.receive_result(with_column_types=with_column_types)
|
|
764
|
+
|
|
765
|
+
def substitute_params(self, query, params, context):
|
|
766
|
+
"""
|
|
767
|
+
Substitutes parameters into a provided query.
|
|
768
|
+
|
|
769
|
+
For example::
|
|
770
|
+
|
|
771
|
+
client = Client(...)
|
|
772
|
+
|
|
773
|
+
substituted_query = client.substitute_params(
|
|
774
|
+
query='SELECT 1234, %(foo)s',
|
|
775
|
+
params={'foo': 'bar'},
|
|
776
|
+
context=client.connection.context
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
# prints: SELECT 1234, 'bar'
|
|
780
|
+
print(substituted_query)
|
|
781
|
+
"""
|
|
782
|
+
# In case of server side templating we don't substitute here.
|
|
783
|
+
if self.connection.context.client_settings['server_side_params']:
|
|
784
|
+
return query
|
|
785
|
+
|
|
786
|
+
if not isinstance(params, dict):
|
|
787
|
+
raise ValueError('Parameters are expected in dict form')
|
|
788
|
+
|
|
789
|
+
escaped = escape_params(params, context)
|
|
790
|
+
return query % escaped
|
|
791
|
+
|
|
792
|
+
@classmethod
|
|
793
|
+
def from_url(cls, url):
|
|
794
|
+
"""
|
|
795
|
+
Return a client configured from the given URL.
|
|
796
|
+
|
|
797
|
+
For example::
|
|
798
|
+
|
|
799
|
+
clickhouse://[user:password]@localhost:9000/default
|
|
800
|
+
clickhouses://[user:password]@localhost:9440/default
|
|
801
|
+
|
|
802
|
+
Three URL schemes are supported:
|
|
803
|
+
|
|
804
|
+
* clickhouse:// creates a normal TCP socket connection
|
|
805
|
+
* clickhouses:// creates a SSL wrapped TCP socket connection
|
|
806
|
+
|
|
807
|
+
Any additional querystring arguments will be passed along to
|
|
808
|
+
the Connection class's initializer.
|
|
809
|
+
"""
|
|
810
|
+
host, kwargs = parse_url(url)
|
|
811
|
+
|
|
812
|
+
return cls(host, **kwargs)
|