clickhouse-driver 0.2.9__cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. clickhouse_driver/__init__.py +9 -0
  2. clickhouse_driver/block.py +227 -0
  3. clickhouse_driver/blockstreamprofileinfo.py +22 -0
  4. clickhouse_driver/bufferedreader.cpython-39-powerpc64le-linux-gnu.so +0 -0
  5. clickhouse_driver/bufferedwriter.cpython-39-powerpc64le-linux-gnu.so +0 -0
  6. clickhouse_driver/client.py +812 -0
  7. clickhouse_driver/clientinfo.py +119 -0
  8. clickhouse_driver/columns/__init__.py +0 -0
  9. clickhouse_driver/columns/arraycolumn.py +161 -0
  10. clickhouse_driver/columns/base.py +221 -0
  11. clickhouse_driver/columns/boolcolumn.py +7 -0
  12. clickhouse_driver/columns/datecolumn.py +108 -0
  13. clickhouse_driver/columns/datetimecolumn.py +203 -0
  14. clickhouse_driver/columns/decimalcolumn.py +116 -0
  15. clickhouse_driver/columns/enumcolumn.py +129 -0
  16. clickhouse_driver/columns/exceptions.py +12 -0
  17. clickhouse_driver/columns/floatcolumn.py +34 -0
  18. clickhouse_driver/columns/intcolumn.py +157 -0
  19. clickhouse_driver/columns/intervalcolumn.py +33 -0
  20. clickhouse_driver/columns/ipcolumn.py +118 -0
  21. clickhouse_driver/columns/jsoncolumn.py +37 -0
  22. clickhouse_driver/columns/largeint.cpython-39-powerpc64le-linux-gnu.so +0 -0
  23. clickhouse_driver/columns/lowcardinalitycolumn.py +142 -0
  24. clickhouse_driver/columns/mapcolumn.py +73 -0
  25. clickhouse_driver/columns/nestedcolumn.py +10 -0
  26. clickhouse_driver/columns/nothingcolumn.py +13 -0
  27. clickhouse_driver/columns/nullablecolumn.py +7 -0
  28. clickhouse_driver/columns/nullcolumn.py +15 -0
  29. clickhouse_driver/columns/numpy/__init__.py +0 -0
  30. clickhouse_driver/columns/numpy/base.py +47 -0
  31. clickhouse_driver/columns/numpy/boolcolumn.py +8 -0
  32. clickhouse_driver/columns/numpy/datecolumn.py +19 -0
  33. clickhouse_driver/columns/numpy/datetimecolumn.py +146 -0
  34. clickhouse_driver/columns/numpy/floatcolumn.py +24 -0
  35. clickhouse_driver/columns/numpy/intcolumn.py +43 -0
  36. clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +96 -0
  37. clickhouse_driver/columns/numpy/service.py +58 -0
  38. clickhouse_driver/columns/numpy/stringcolumn.py +78 -0
  39. clickhouse_driver/columns/numpy/tuplecolumn.py +37 -0
  40. clickhouse_driver/columns/service.py +185 -0
  41. clickhouse_driver/columns/simpleaggregatefunctioncolumn.py +7 -0
  42. clickhouse_driver/columns/stringcolumn.py +73 -0
  43. clickhouse_driver/columns/tuplecolumn.py +63 -0
  44. clickhouse_driver/columns/util.py +61 -0
  45. clickhouse_driver/columns/uuidcolumn.py +64 -0
  46. clickhouse_driver/compression/__init__.py +28 -0
  47. clickhouse_driver/compression/base.py +87 -0
  48. clickhouse_driver/compression/lz4.py +21 -0
  49. clickhouse_driver/compression/lz4hc.py +9 -0
  50. clickhouse_driver/compression/zstd.py +20 -0
  51. clickhouse_driver/connection.py +793 -0
  52. clickhouse_driver/context.py +36 -0
  53. clickhouse_driver/dbapi/__init__.py +62 -0
  54. clickhouse_driver/dbapi/connection.py +99 -0
  55. clickhouse_driver/dbapi/cursor.py +370 -0
  56. clickhouse_driver/dbapi/errors.py +40 -0
  57. clickhouse_driver/dbapi/extras.py +73 -0
  58. clickhouse_driver/defines.py +58 -0
  59. clickhouse_driver/errors.py +453 -0
  60. clickhouse_driver/log.py +48 -0
  61. clickhouse_driver/numpy/__init__.py +0 -0
  62. clickhouse_driver/numpy/block.py +8 -0
  63. clickhouse_driver/numpy/helpers.py +28 -0
  64. clickhouse_driver/numpy/result.py +123 -0
  65. clickhouse_driver/opentelemetry.py +43 -0
  66. clickhouse_driver/progress.py +44 -0
  67. clickhouse_driver/protocol.py +130 -0
  68. clickhouse_driver/queryprocessingstage.py +8 -0
  69. clickhouse_driver/reader.py +69 -0
  70. clickhouse_driver/readhelpers.py +26 -0
  71. clickhouse_driver/result.py +144 -0
  72. clickhouse_driver/settings/__init__.py +0 -0
  73. clickhouse_driver/settings/available.py +405 -0
  74. clickhouse_driver/settings/types.py +50 -0
  75. clickhouse_driver/settings/writer.py +34 -0
  76. clickhouse_driver/streams/__init__.py +0 -0
  77. clickhouse_driver/streams/compressed.py +88 -0
  78. clickhouse_driver/streams/native.py +108 -0
  79. clickhouse_driver/util/__init__.py +0 -0
  80. clickhouse_driver/util/compat.py +39 -0
  81. clickhouse_driver/util/escape.py +94 -0
  82. clickhouse_driver/util/helpers.py +171 -0
  83. clickhouse_driver/varint.cpython-39-powerpc64le-linux-gnu.so +0 -0
  84. clickhouse_driver/writer.py +67 -0
  85. clickhouse_driver-0.2.9.dist-info/LICENSE +21 -0
  86. clickhouse_driver-0.2.9.dist-info/METADATA +202 -0
  87. clickhouse_driver-0.2.9.dist-info/RECORD +89 -0
  88. clickhouse_driver-0.2.9.dist-info/WHEEL +6 -0
  89. clickhouse_driver-0.2.9.dist-info/top_level.txt +1 -0
@@ -0,0 +1,812 @@
1
+ import re
2
+ from collections import deque
3
+ from contextlib import contextmanager
4
+ from time import time
5
+ import types
6
+ from urllib.parse import urlparse
7
+
8
+ from . import errors, defines
9
+ from .block import ColumnOrientedBlock, RowOrientedBlock
10
+ from .connection import Connection
11
+ from .log import log_block
12
+ from .protocol import ServerPacketTypes
13
+ from .result import (
14
+ IterQueryResult, ProgressQueryResult, QueryResult, QueryInfo
15
+ )
16
+ from .util.escape import escape_params
17
+ from .util.helpers import column_chunks, chunks, parse_url
18
+
19
+
20
+ class Client(object):
21
+ """
22
+ Client for communication with the ClickHouse server.
23
+ Single connection is established per each connected instance of the client.
24
+
25
+ :param settings: Dictionary of settings that passed to every query (except
26
+ for the client settings, see below). Defaults to ``None``
27
+ (no additional settings). See all available settings in
28
+ `ClickHouse docs
29
+ <https://clickhouse.com/docs/en/operations/settings/settings/>`_.
30
+ :param \\**kwargs: All other args are passed to the
31
+ :py:class:`~clickhouse_driver.connection.Connection`
32
+ constructor.
33
+
34
+ The following keys when passed in ``settings`` are used for configuring the
35
+ client itself:
36
+
37
+ * ``insert_block_size`` -- chunk size to split rows for ``INSERT``.
38
+ Defaults to ``1048576``.
39
+ * ``strings_as_bytes`` -- turns off string column encoding/decoding.
40
+ * ``strings_encoding`` -- specifies string encoding. UTF-8 by default.
41
+ * ``use_numpy`` -- Use NumPy for columns reading. New in version
42
+ *0.2.0*.
43
+ * ``opentelemetry_traceparent`` -- OpenTelemetry traceparent header as
44
+ described by W3C Trace Context recommendation.
45
+ New in version *0.2.2*.
46
+ * ``opentelemetry_tracestate`` -- OpenTelemetry tracestate header as
47
+ described by W3C Trace Context recommendation.
48
+ New in version *0.2.2*.
49
+ * ``quota_key`` -- A string to differentiate quotas when the user have
50
+ keyed quotas configured on server.
51
+ New in version *0.2.3*.
52
+ * ``input_format_null_as_default`` -- Initialize null fields with
53
+ default values if data type of this field is not
54
+ nullable. Does not work for NumPy. Default: False.
55
+ New in version *0.2.4*.
56
+ * ``round_robin`` -- If ``alt_hosts`` are provided the query will be
57
+ executed on host picked with round-robin algorithm.
58
+ New in version *0.2.5*.
59
+ * ``namedtuple_as_json`` -- Controls named tuple and nested types
60
+ deserialization. To interpret these column alongside
61
+ with ``allow_experimental_object_type=1`` as Python
62
+ tuple set ``namedtuple_as_json`` to ``False``.
63
+ Default: True.
64
+ New in version *0.2.6*.
65
+ * ``server_side_params`` -- Species on which side query parameters
66
+ should be rendered into placeholders.
67
+ Default: False. Means that parameters are rendered
68
+ on driver's side.
69
+ New in version *0.2.7*.
70
+ """
71
+
72
+ available_client_settings = (
73
+ 'insert_block_size', # TODO: rename to max_insert_block_size
74
+ 'strings_as_bytes',
75
+ 'strings_encoding',
76
+ 'use_numpy',
77
+ 'opentelemetry_traceparent',
78
+ 'opentelemetry_tracestate',
79
+ 'quota_key',
80
+ 'input_format_null_as_default',
81
+ 'namedtuple_as_json',
82
+ 'server_side_params'
83
+ )
84
+
85
+ def __init__(self, *args, **kwargs):
86
+ self.settings = (kwargs.pop('settings', None) or {}).copy()
87
+
88
+ self.client_settings = {
89
+ 'insert_block_size': int(self.settings.pop(
90
+ 'insert_block_size', defines.DEFAULT_INSERT_BLOCK_SIZE,
91
+ )),
92
+ 'strings_as_bytes': self.settings.pop(
93
+ 'strings_as_bytes', False
94
+ ),
95
+ 'strings_encoding': self.settings.pop(
96
+ 'strings_encoding', defines.STRINGS_ENCODING
97
+ ),
98
+ 'use_numpy': self.settings.pop(
99
+ 'use_numpy', False
100
+ ),
101
+ 'opentelemetry_traceparent': self.settings.pop(
102
+ 'opentelemetry_traceparent', None
103
+ ),
104
+ 'opentelemetry_tracestate': self.settings.pop(
105
+ 'opentelemetry_tracestate', ''
106
+ ),
107
+ 'quota_key': self.settings.pop(
108
+ 'quota_key', ''
109
+ ),
110
+ 'input_format_null_as_default': self.settings.pop(
111
+ 'input_format_null_as_default', False
112
+ ),
113
+ 'namedtuple_as_json': self.settings.pop(
114
+ 'namedtuple_as_json', True
115
+ ),
116
+ 'server_side_params': self.settings.pop(
117
+ 'server_side_params', False
118
+ )
119
+ }
120
+
121
+ if self.client_settings['use_numpy']:
122
+ try:
123
+ from .numpy.result import (
124
+ NumpyIterQueryResult, NumpyProgressQueryResult,
125
+ NumpyQueryResult
126
+ )
127
+ self.query_result_cls = NumpyQueryResult
128
+ self.iter_query_result_cls = NumpyIterQueryResult
129
+ self.progress_query_result_cls = NumpyProgressQueryResult
130
+ except ImportError:
131
+ raise RuntimeError('Extras for NumPy must be installed')
132
+ else:
133
+ self.query_result_cls = QueryResult
134
+ self.iter_query_result_cls = IterQueryResult
135
+ self.progress_query_result_cls = ProgressQueryResult
136
+
137
+ round_robin = kwargs.pop('round_robin', False)
138
+ self.connections = deque([Connection(*args, **kwargs)])
139
+
140
+ if round_robin and 'alt_hosts' in kwargs:
141
+ alt_hosts = kwargs.pop('alt_hosts')
142
+ for host in alt_hosts.split(','):
143
+ url = urlparse('clickhouse://' + host)
144
+
145
+ connection_kwargs = kwargs.copy()
146
+ num_args = len(args)
147
+ if num_args >= 2:
148
+ # host and port as positional arguments
149
+ connection_args = (url.hostname, url.port) + args[2:]
150
+ elif num_args >= 1:
151
+ # host as positional and port as keyword argument
152
+ connection_args = (url.hostname, ) + args[1:]
153
+ connection_kwargs['port'] = url.port
154
+ else:
155
+ # host and port as keyword arguments
156
+ connection_args = tuple()
157
+ connection_kwargs['host'] = url.hostname
158
+ connection_kwargs['port'] = url.port
159
+
160
+ connection = Connection(*connection_args, **connection_kwargs)
161
+ self.connections.append(connection)
162
+
163
+ self.connection = self.get_connection()
164
+ self.reset_last_query()
165
+ super(Client, self).__init__()
166
+
167
+ def __enter__(self):
168
+ return self
169
+
170
+ def __exit__(self, exc_type, exc_val, exc_tb):
171
+ self.disconnect()
172
+
173
+ def get_connection(self):
174
+ if hasattr(self, 'connection'):
175
+ self.connections.append(self.connection)
176
+
177
+ connection = self.connections.popleft()
178
+
179
+ connection.context.settings = self.settings
180
+ connection.context.client_settings = self.client_settings
181
+ return connection
182
+
183
+ def disconnect(self):
184
+ self.disconnect_connection()
185
+ for connection in self.connections:
186
+ connection.disconnect()
187
+
188
+ def disconnect_connection(self):
189
+ """
190
+ Disconnects from the server.
191
+ """
192
+ self.connection.disconnect()
193
+ self.reset_last_query()
194
+
195
+ def reset_last_query(self):
196
+ self.last_query = None
197
+
198
+ def receive_result(self, with_column_types=False, progress=False,
199
+ columnar=False):
200
+
201
+ gen = self.packet_generator()
202
+
203
+ if progress:
204
+ return self.progress_query_result_cls(
205
+ gen, with_column_types=with_column_types, columnar=columnar
206
+ )
207
+
208
+ else:
209
+ result = self.query_result_cls(
210
+ gen, with_column_types=with_column_types, columnar=columnar
211
+ )
212
+ return result.get_result()
213
+
214
+ def iter_receive_result(self, with_column_types=False):
215
+ gen = self.packet_generator()
216
+
217
+ result = self.iter_query_result_cls(
218
+ gen, with_column_types=with_column_types
219
+ )
220
+
221
+ for rows in result:
222
+ for row in rows:
223
+ yield row
224
+
225
+ def packet_generator(self):
226
+ while True:
227
+ try:
228
+ packet = self.receive_packet()
229
+ if not packet:
230
+ break
231
+
232
+ if packet is True:
233
+ continue
234
+
235
+ yield packet
236
+
237
+ except (Exception, KeyboardInterrupt):
238
+ self.disconnect()
239
+ raise
240
+
241
+ def receive_packet(self):
242
+ packet = self.connection.receive_packet()
243
+
244
+ if packet.type == ServerPacketTypes.EXCEPTION:
245
+ raise packet.exception
246
+
247
+ elif packet.type == ServerPacketTypes.PROGRESS:
248
+ self.last_query.store_progress(packet.progress)
249
+ return packet
250
+
251
+ elif packet.type == ServerPacketTypes.END_OF_STREAM:
252
+ return False
253
+
254
+ elif packet.type == ServerPacketTypes.DATA:
255
+ return packet
256
+
257
+ elif packet.type == ServerPacketTypes.TOTALS:
258
+ return packet
259
+
260
+ elif packet.type == ServerPacketTypes.EXTREMES:
261
+ return packet
262
+
263
+ elif packet.type == ServerPacketTypes.PROFILE_INFO:
264
+ self.last_query.store_profile(packet.profile_info)
265
+ return True
266
+
267
+ else:
268
+ return True
269
+
270
+ def make_query_settings(self, settings):
271
+ settings = dict(settings or {})
272
+
273
+ # Pick client-related settings.
274
+ client_settings = self.client_settings.copy()
275
+ for key in self.available_client_settings:
276
+ if key in settings:
277
+ client_settings[key] = settings.pop(key)
278
+
279
+ self.connection.context.client_settings = client_settings
280
+
281
+ # The rest of settings are ClickHouse-related.
282
+ query_settings = self.settings.copy()
283
+ query_settings.update(settings)
284
+ self.connection.context.settings = query_settings
285
+
286
+ def track_current_database(self, query):
287
+ query = query.strip('; ')
288
+ if query.lower().startswith('use '):
289
+ self.connection.database = query[4:].strip()
290
+
291
+ def establish_connection(self, settings):
292
+ num_connections = len(self.connections)
293
+ if hasattr(self, 'connection'):
294
+ num_connections += 1
295
+
296
+ for i in range(num_connections):
297
+ try:
298
+ self.connection = self.get_connection()
299
+ self.make_query_settings(settings)
300
+ self.connection.force_connect()
301
+ self.last_query = QueryInfo()
302
+
303
+ except (errors.SocketTimeoutError, errors.NetworkError):
304
+ if i < num_connections - 1:
305
+ continue
306
+ raise
307
+
308
+ return
309
+
310
+ @contextmanager
311
+ def disconnect_on_error(self, query, settings):
312
+ try:
313
+ self.establish_connection(settings)
314
+ self.connection.server_info.session_timezone = None
315
+
316
+ yield
317
+
318
+ self.track_current_database(query)
319
+
320
+ except (Exception, KeyboardInterrupt):
321
+ self.disconnect()
322
+ raise
323
+
324
+ def execute(self, query, params=None, with_column_types=False,
325
+ external_tables=None, query_id=None, settings=None,
326
+ types_check=False, columnar=False):
327
+ """
328
+ Executes query.
329
+
330
+ Establishes new connection if it wasn't established yet.
331
+ After query execution connection remains intact for next queries.
332
+ If connection can't be reused it will be closed and new connection will
333
+ be created.
334
+
335
+ :param query: query that will be send to server.
336
+ :param params: substitution parameters for SELECT queries and data for
337
+ INSERT queries. Data for INSERT can be `list`, `tuple`
338
+ or :data:`~types.GeneratorType`.
339
+ Defaults to ``None`` (no parameters or data).
340
+ :param with_column_types: if specified column names and types will be
341
+ returned alongside with result.
342
+ Defaults to ``False``.
343
+ :param external_tables: external tables to send.
344
+ Defaults to ``None`` (no external tables).
345
+ :param query_id: the query identifier. If no query id specified
346
+ ClickHouse server will generate it.
347
+ :param settings: dictionary of query settings.
348
+ Defaults to ``None`` (no additional settings).
349
+ :param types_check: enables type checking of data for INSERT queries.
350
+ Causes additional overhead. Defaults to ``False``.
351
+ :param columnar: if specified the result of the SELECT query will be
352
+ returned in column-oriented form.
353
+ It also allows to INSERT data in columnar form.
354
+ Defaults to ``False`` (row-like form).
355
+
356
+ :return: * number of inserted rows for INSERT queries with data.
357
+ Returning rows count from INSERT FROM SELECT is not
358
+ supported.
359
+ * if `with_column_types=False`: `list` of `tuples` with
360
+ rows/columns.
361
+ * if `with_column_types=True`: `tuple` of 2 elements:
362
+ * The first element is `list` of `tuples` with
363
+ rows/columns.
364
+ * The second element information is about columns: names
365
+ and types.
366
+ """
367
+
368
+ start_time = time()
369
+
370
+ with self.disconnect_on_error(query, settings):
371
+ # INSERT queries can use list/tuple/generator of list/tuples/dicts.
372
+ # For SELECT parameters can be passed in only in dict right now.
373
+ is_insert = isinstance(params, (list, tuple, types.GeneratorType))
374
+
375
+ if is_insert:
376
+ rv = self.process_insert_query(
377
+ query, params, external_tables=external_tables,
378
+ query_id=query_id, types_check=types_check,
379
+ columnar=columnar
380
+ )
381
+ else:
382
+ rv = self.process_ordinary_query(
383
+ query, params=params, with_column_types=with_column_types,
384
+ external_tables=external_tables,
385
+ query_id=query_id, types_check=types_check,
386
+ columnar=columnar
387
+ )
388
+ self.last_query.store_elapsed(time() - start_time)
389
+ return rv
390
+
391
+ def execute_with_progress(
392
+ self, query, params=None, with_column_types=False,
393
+ external_tables=None, query_id=None, settings=None,
394
+ types_check=False, columnar=False):
395
+ """
396
+ Executes SELECT query with progress information.
397
+ See, :ref:`execute-with-progress`.
398
+
399
+ :param query: query that will be send to server.
400
+ :param params: substitution parameters for SELECT queries and data for
401
+ INSERT queries. Data for INSERT can be `list`, `tuple`
402
+ or :data:`~types.GeneratorType`.
403
+ Defaults to ``None`` (no parameters or data).
404
+ :param with_column_types: if specified column names and types will be
405
+ returned alongside with result.
406
+ Defaults to ``False``.
407
+ :param external_tables: external tables to send.
408
+ Defaults to ``None`` (no external tables).
409
+ :param query_id: the query identifier. If no query id specified
410
+ ClickHouse server will generate it.
411
+ :param settings: dictionary of query settings.
412
+ Defaults to ``None`` (no additional settings).
413
+ :param types_check: enables type checking of data for INSERT queries.
414
+ Causes additional overhead. Defaults to ``False``.
415
+ :param columnar: if specified the result will be returned in
416
+ column-oriented form.
417
+ Defaults to ``False`` (row-like form).
418
+ :return: :ref:`progress-query-result` proxy.
419
+ """
420
+
421
+ with self.disconnect_on_error(query, settings):
422
+ return self.process_ordinary_query_with_progress(
423
+ query, params=params, with_column_types=with_column_types,
424
+ external_tables=external_tables, query_id=query_id,
425
+ types_check=types_check, columnar=columnar
426
+ )
427
+
428
+ def execute_iter(
429
+ self, query, params=None, with_column_types=False,
430
+ external_tables=None, query_id=None, settings=None,
431
+ types_check=False, chunk_size=1):
432
+ """
433
+ *New in version 0.0.14.*
434
+
435
+ Executes SELECT query with results streaming. See, :ref:`execute-iter`.
436
+
437
+ :param query: query that will be send to server.
438
+ :param params: substitution parameters for SELECT queries and data for
439
+ INSERT queries. Data for INSERT can be `list`, `tuple`
440
+ or :data:`~types.GeneratorType`.
441
+ Defaults to ``None`` (no parameters or data).
442
+ :param with_column_types: if specified column names and types will be
443
+ returned alongside with result.
444
+ Defaults to ``False``.
445
+ :param external_tables: external tables to send.
446
+ Defaults to ``None`` (no external tables).
447
+ :param query_id: the query identifier. If no query id specified
448
+ ClickHouse server will generate it.
449
+ :param settings: dictionary of query settings.
450
+ Defaults to ``None`` (no additional settings).
451
+ :param types_check: enables type checking of data for INSERT queries.
452
+ Causes additional overhead. Defaults to ``False``.
453
+ :param chunk_size: chunk query results.
454
+ :return: :ref:`iter-query-result` proxy.
455
+ """
456
+ with self.disconnect_on_error(query, settings):
457
+ rv = self.iter_process_ordinary_query(
458
+ query, params=params, with_column_types=with_column_types,
459
+ external_tables=external_tables,
460
+ query_id=query_id, types_check=types_check
461
+ )
462
+ return chunks(rv, chunk_size) if chunk_size > 1 else rv
463
+
464
+ def query_dataframe(
465
+ self, query, params=None, external_tables=None, query_id=None,
466
+ settings=None, replace_nonwords=True):
467
+ """
468
+ *New in version 0.2.0.*
469
+
470
+ Queries DataFrame with specified SELECT query.
471
+
472
+ :param query: query that will be send to server.
473
+ :param params: substitution parameters.
474
+ Defaults to ``None`` (no parameters or data).
475
+ :param external_tables: external tables to send.
476
+ Defaults to ``None`` (no external tables).
477
+ :param query_id: the query identifier. If no query id specified
478
+ ClickHouse server will generate it.
479
+ :param settings: dictionary of query settings.
480
+ Defaults to ``None`` (no additional settings).
481
+ :param replace_nonwords: boolean to replace non-words in column names
482
+ to underscores. Defaults to ``True``.
483
+ :return: pandas DataFrame.
484
+ """
485
+
486
+ try:
487
+ import pandas as pd
488
+ except ImportError:
489
+ raise RuntimeError('Extras for NumPy must be installed')
490
+
491
+ data, columns = self.execute(
492
+ query, columnar=True, with_column_types=True, params=params,
493
+ external_tables=external_tables, query_id=query_id,
494
+ settings=settings
495
+ )
496
+
497
+ columns = [name for name, type_ in columns]
498
+ if replace_nonwords:
499
+ columns = [re.sub(r'\W', '_', x) for x in columns]
500
+
501
+ return pd.DataFrame(
502
+ {col: d for d, col in zip(data, columns)}, columns=columns
503
+ )
504
+
505
+ def insert_dataframe(
506
+ self, query, dataframe, external_tables=None, query_id=None,
507
+ settings=None):
508
+ """
509
+ *New in version 0.2.0.*
510
+
511
+ Inserts pandas DataFrame with specified query.
512
+
513
+ :param query: query that will be send to server.
514
+ :param dataframe: pandas DataFrame.
515
+ :param external_tables: external tables to send.
516
+ Defaults to ``None`` (no external tables).
517
+ :param query_id: the query identifier. If no query id specified
518
+ ClickHouse server will generate it.
519
+ :param settings: dictionary of query settings.
520
+ Defaults to ``None`` (no additional settings).
521
+ :return: number of inserted rows.
522
+ """
523
+
524
+ try:
525
+ import pandas as pd # noqa: F401
526
+ except ImportError:
527
+ raise RuntimeError('Extras for NumPy must be installed')
528
+
529
+ start_time = time()
530
+
531
+ with self.disconnect_on_error(query, settings):
532
+ self.connection.send_query(query, query_id=query_id)
533
+ self.connection.send_external_tables(external_tables)
534
+
535
+ sample_block = self.receive_sample_block()
536
+ rv = None
537
+ if sample_block:
538
+ columns = [x[0] for x in sample_block.columns_with_types]
539
+ # raise if any columns are missing from the dataframe
540
+ diff = set(columns) - set(dataframe.columns)
541
+ if len(diff):
542
+ msg = "DataFrame missing required columns: {}"
543
+ raise ValueError(msg.format(list(diff)))
544
+
545
+ data = [dataframe[column].values for column in columns]
546
+ rv = self.send_data(sample_block, data, columnar=True)
547
+ self.receive_end_of_query()
548
+
549
+ self.last_query.store_elapsed(time() - start_time)
550
+ return rv
551
+
552
+ def process_ordinary_query_with_progress(
553
+ self, query, params=None, with_column_types=False,
554
+ external_tables=None, query_id=None,
555
+ types_check=False, columnar=False):
556
+
557
+ if params is not None:
558
+ query = self.substitute_params(
559
+ query, params, self.connection.context
560
+ )
561
+
562
+ self.connection.send_query(query, query_id=query_id, params=params)
563
+ self.connection.send_external_tables(external_tables,
564
+ types_check=types_check)
565
+ return self.receive_result(with_column_types=with_column_types,
566
+ progress=True, columnar=columnar)
567
+
568
+ def process_ordinary_query(
569
+ self, query, params=None, with_column_types=False,
570
+ external_tables=None, query_id=None,
571
+ types_check=False, columnar=False):
572
+
573
+ if params is not None:
574
+ query = self.substitute_params(
575
+ query, params, self.connection.context
576
+ )
577
+ self.connection.send_query(query, query_id=query_id, params=params)
578
+ self.connection.send_external_tables(external_tables,
579
+ types_check=types_check)
580
+ return self.receive_result(with_column_types=with_column_types,
581
+ columnar=columnar)
582
+
583
+ def iter_process_ordinary_query(
584
+ self, query, params=None, with_column_types=False,
585
+ external_tables=None, query_id=None,
586
+ types_check=False):
587
+
588
+ if params is not None:
589
+ query = self.substitute_params(
590
+ query, params, self.connection.context
591
+ )
592
+
593
+ self.connection.send_query(query, query_id=query_id, params=params)
594
+ self.connection.send_external_tables(external_tables,
595
+ types_check=types_check)
596
+ return self.iter_receive_result(with_column_types=with_column_types)
597
+
598
+ def process_insert_query(self, query_without_data, data,
599
+ external_tables=None, query_id=None,
600
+ types_check=False, columnar=False):
601
+ self.connection.send_query(query_without_data, query_id=query_id)
602
+ self.connection.send_external_tables(external_tables,
603
+ types_check=types_check)
604
+ sample_block = self.receive_sample_block()
605
+
606
+ if sample_block:
607
+ rv = self.send_data(sample_block, data,
608
+ types_check=types_check, columnar=columnar)
609
+ self.receive_end_of_insert_query()
610
+ return rv
611
+
612
+ def receive_sample_block(self):
613
+ while True:
614
+ packet = self.connection.receive_packet()
615
+
616
+ if packet.type == ServerPacketTypes.DATA:
617
+ return packet.block
618
+
619
+ elif packet.type == ServerPacketTypes.EXCEPTION:
620
+ raise packet.exception
621
+
622
+ elif packet.type == ServerPacketTypes.LOG:
623
+ log_block(packet.block)
624
+
625
+ elif packet.type == ServerPacketTypes.TABLE_COLUMNS:
626
+ pass
627
+
628
+ else:
629
+ message = self.connection.unexpected_packet_message(
630
+ 'Data, Exception, Log or TableColumns', packet.type
631
+ )
632
+ raise errors.UnexpectedPacketFromServerError(message)
633
+
634
+ def send_data(self, sample_block, data, types_check=False, columnar=False):
635
+ inserted_rows = 0
636
+
637
+ client_settings = self.connection.context.client_settings
638
+ block_cls = ColumnOrientedBlock if columnar else RowOrientedBlock
639
+
640
+ if client_settings['use_numpy']:
641
+ try:
642
+ from .numpy.helpers import column_chunks as numpy_column_chunks
643
+
644
+ if columnar:
645
+ slicer = numpy_column_chunks
646
+ else:
647
+ raise ValueError(
648
+ 'NumPy inserts is only allowed with columnar=True'
649
+ )
650
+
651
+ except ImportError:
652
+ raise RuntimeError('Extras for NumPy must be installed')
653
+
654
+ else:
655
+ slicer = column_chunks if columnar else chunks
656
+
657
+ for chunk in slicer(data, client_settings['insert_block_size']):
658
+ block = block_cls(sample_block.columns_with_types, chunk,
659
+ types_check=types_check)
660
+ self.connection.send_data(block)
661
+ inserted_rows += block.num_rows
662
+
663
+ # Starting from the specific revision there are profile events
664
+ # sent by server in response to each inserted block
665
+ self.receive_profile_events()
666
+
667
+ # Empty block means end of data.
668
+ self.connection.send_data(block_cls())
669
+ # If enabled by revision profile events are also sent after empty block
670
+ self.receive_profile_events()
671
+
672
+ return inserted_rows
673
+
674
+ def receive_end_of_query(self):
675
+ while True:
676
+ packet = self.connection.receive_packet()
677
+
678
+ if packet.type == ServerPacketTypes.END_OF_STREAM:
679
+ break
680
+
681
+ elif packet.type == ServerPacketTypes.PROGRESS:
682
+ self.last_query.store_progress(packet.progress)
683
+
684
+ elif packet.type == ServerPacketTypes.EXCEPTION:
685
+ raise packet.exception
686
+
687
+ elif packet.type == ServerPacketTypes.LOG:
688
+ log_block(packet.block)
689
+
690
+ elif packet.type == ServerPacketTypes.TABLE_COLUMNS:
691
+ pass
692
+
693
+ elif packet.type == ServerPacketTypes.PROFILE_EVENTS:
694
+ self.last_query.store_profile(packet.profile_info)
695
+
696
+ else:
697
+ message = self.connection.unexpected_packet_message(
698
+ 'Exception, EndOfStream, Progress, TableColumns, '
699
+ 'ProfileEvents or Log', packet.type
700
+ )
701
+ raise errors.UnexpectedPacketFromServerError(message)
702
+
703
+ def receive_end_of_insert_query(self):
704
+ while True:
705
+ packet = self.connection.receive_packet()
706
+
707
+ if packet.type == ServerPacketTypes.END_OF_STREAM:
708
+ break
709
+
710
+ elif packet.type == ServerPacketTypes.LOG:
711
+ log_block(packet.block)
712
+
713
+ elif packet.type == ServerPacketTypes.PROGRESS:
714
+ self.last_query.store_progress(packet.progress)
715
+
716
+ elif packet.type == ServerPacketTypes.EXCEPTION:
717
+ raise packet.exception
718
+
719
+ else:
720
+ message = self.connection.unexpected_packet_message(
721
+ 'EndOfStream, Log, Progress or Exception', packet.type
722
+ )
723
+ raise errors.UnexpectedPacketFromServerError(message)
724
+
725
+ def receive_profile_events(self):
726
+ revision = self.connection.server_info.used_revision
727
+ if (
728
+ revision <
729
+ defines.DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT
730
+ ):
731
+ return None
732
+
733
+ while True:
734
+ packet = self.connection.receive_packet()
735
+
736
+ if packet.type == ServerPacketTypes.PROFILE_EVENTS:
737
+ self.last_query.store_profile(packet.profile_info)
738
+ break
739
+
740
+ elif packet.type == ServerPacketTypes.PROGRESS:
741
+ self.last_query.store_progress(packet.progress)
742
+
743
+ elif packet.type == ServerPacketTypes.LOG:
744
+ log_block(packet.block)
745
+
746
+ elif packet.type == ServerPacketTypes.EXCEPTION:
747
+ raise packet.exception
748
+
749
+ elif packet.type == ServerPacketTypes.TIMEZONE_UPDATE:
750
+ pass
751
+
752
+ else:
753
+ message = self.connection.unexpected_packet_message(
754
+ 'ProfileEvents, Progress, Log, Exception or '
755
+ 'TimezoneUpdate', packet.type
756
+ )
757
+ raise errors.UnexpectedPacketFromServerError(message)
758
+
759
+ def cancel(self, with_column_types=False):
760
+ # TODO: Add warning if already cancelled.
761
+ self.connection.send_cancel()
762
+ # Client must still read until END_OF_STREAM packet.
763
+ return self.receive_result(with_column_types=with_column_types)
764
+
765
+ def substitute_params(self, query, params, context):
766
+ """
767
+ Substitutes parameters into a provided query.
768
+
769
+ For example::
770
+
771
+ client = Client(...)
772
+
773
+ substituted_query = client.substitute_params(
774
+ query='SELECT 1234, %(foo)s',
775
+ params={'foo': 'bar'},
776
+ context=client.connection.context
777
+ )
778
+
779
+ # prints: SELECT 1234, 'bar'
780
+ print(substituted_query)
781
+ """
782
+ # In case of server side templating we don't substitute here.
783
+ if self.connection.context.client_settings['server_side_params']:
784
+ return query
785
+
786
+ if not isinstance(params, dict):
787
+ raise ValueError('Parameters are expected in dict form')
788
+
789
+ escaped = escape_params(params, context)
790
+ return query % escaped
791
+
792
+ @classmethod
793
+ def from_url(cls, url):
794
+ """
795
+ Return a client configured from the given URL.
796
+
797
+ For example::
798
+
799
+ clickhouse://[user:password]@localhost:9000/default
800
+ clickhouses://[user:password]@localhost:9440/default
801
+
802
+ Three URL schemes are supported:
803
+
804
+ * clickhouse:// creates a normal TCP socket connection
805
+ * clickhouses:// creates a SSL wrapped TCP socket connection
806
+
807
+ Any additional querystring arguments will be passed along to
808
+ the Connection class's initializer.
809
+ """
810
+ host, kwargs = parse_url(url)
811
+
812
+ return cls(host, **kwargs)