clickhouse-driver 0.2.1__cp39-cp39-win_amd64.whl → 0.2.8__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. clickhouse_driver/__init__.py +9 -9
  2. clickhouse_driver/block.py +227 -195
  3. clickhouse_driver/blockstreamprofileinfo.py +22 -22
  4. clickhouse_driver/bufferedreader.cp39-win_amd64.pyd +0 -0
  5. clickhouse_driver/bufferedwriter.cp39-win_amd64.pyd +0 -0
  6. clickhouse_driver/client.py +896 -666
  7. clickhouse_driver/clientinfo.py +119 -80
  8. clickhouse_driver/columns/arraycolumn.py +161 -150
  9. clickhouse_driver/columns/base.py +221 -147
  10. clickhouse_driver/columns/boolcolumn.py +7 -0
  11. clickhouse_driver/columns/datecolumn.py +108 -49
  12. clickhouse_driver/columns/datetimecolumn.py +202 -207
  13. clickhouse_driver/columns/decimalcolumn.py +116 -118
  14. clickhouse_driver/columns/enumcolumn.py +119 -119
  15. clickhouse_driver/columns/exceptions.py +12 -12
  16. clickhouse_driver/columns/floatcolumn.py +34 -34
  17. clickhouse_driver/columns/intcolumn.py +157 -157
  18. clickhouse_driver/columns/intervalcolumn.py +33 -33
  19. clickhouse_driver/columns/ipcolumn.py +118 -118
  20. clickhouse_driver/columns/jsoncolumn.py +37 -0
  21. clickhouse_driver/columns/largeint.cp39-win_amd64.pyd +0 -0
  22. clickhouse_driver/columns/lowcardinalitycolumn.py +142 -123
  23. clickhouse_driver/columns/mapcolumn.py +73 -58
  24. clickhouse_driver/columns/nestedcolumn.py +10 -0
  25. clickhouse_driver/columns/nothingcolumn.py +13 -13
  26. clickhouse_driver/columns/nullablecolumn.py +7 -7
  27. clickhouse_driver/columns/nullcolumn.py +15 -15
  28. clickhouse_driver/columns/numpy/base.py +47 -14
  29. clickhouse_driver/columns/numpy/boolcolumn.py +8 -0
  30. clickhouse_driver/columns/numpy/datecolumn.py +19 -12
  31. clickhouse_driver/columns/numpy/datetimecolumn.py +143 -145
  32. clickhouse_driver/columns/numpy/floatcolumn.py +24 -13
  33. clickhouse_driver/columns/numpy/intcolumn.py +43 -43
  34. clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +96 -83
  35. clickhouse_driver/columns/numpy/service.py +58 -80
  36. clickhouse_driver/columns/numpy/stringcolumn.py +78 -76
  37. clickhouse_driver/columns/numpy/tuplecolumn.py +37 -0
  38. clickhouse_driver/columns/service.py +185 -131
  39. clickhouse_driver/columns/simpleaggregatefunctioncolumn.py +7 -7
  40. clickhouse_driver/columns/stringcolumn.py +73 -73
  41. clickhouse_driver/columns/tuplecolumn.py +63 -65
  42. clickhouse_driver/columns/util.py +60 -0
  43. clickhouse_driver/columns/uuidcolumn.py +64 -64
  44. clickhouse_driver/compression/__init__.py +28 -28
  45. clickhouse_driver/compression/base.py +87 -52
  46. clickhouse_driver/compression/lz4.py +21 -55
  47. clickhouse_driver/compression/lz4hc.py +9 -9
  48. clickhouse_driver/compression/zstd.py +20 -51
  49. clickhouse_driver/connection.py +784 -632
  50. clickhouse_driver/context.py +36 -36
  51. clickhouse_driver/dbapi/__init__.py +62 -62
  52. clickhouse_driver/dbapi/connection.py +99 -96
  53. clickhouse_driver/dbapi/cursor.py +370 -368
  54. clickhouse_driver/dbapi/errors.py +40 -40
  55. clickhouse_driver/dbapi/extras.py +73 -0
  56. clickhouse_driver/defines.py +55 -42
  57. clickhouse_driver/errors.py +453 -446
  58. clickhouse_driver/log.py +48 -44
  59. clickhouse_driver/numpy/block.py +8 -8
  60. clickhouse_driver/numpy/helpers.py +25 -25
  61. clickhouse_driver/numpy/result.py +123 -123
  62. clickhouse_driver/opentelemetry.py +43 -0
  63. clickhouse_driver/progress.py +38 -32
  64. clickhouse_driver/protocol.py +114 -105
  65. clickhouse_driver/queryprocessingstage.py +8 -8
  66. clickhouse_driver/reader.py +69 -69
  67. clickhouse_driver/readhelpers.py +26 -26
  68. clickhouse_driver/result.py +144 -144
  69. clickhouse_driver/settings/available.py +405 -405
  70. clickhouse_driver/settings/types.py +50 -50
  71. clickhouse_driver/settings/writer.py +34 -29
  72. clickhouse_driver/streams/compressed.py +88 -88
  73. clickhouse_driver/streams/native.py +102 -90
  74. clickhouse_driver/util/compat.py +39 -0
  75. clickhouse_driver/util/escape.py +94 -55
  76. clickhouse_driver/util/helpers.py +57 -57
  77. clickhouse_driver/varint.cp39-win_amd64.pyd +0 -0
  78. clickhouse_driver/writer.py +67 -67
  79. {clickhouse_driver-0.2.1.dist-info → clickhouse_driver-0.2.8.dist-info}/LICENSE +21 -21
  80. clickhouse_driver-0.2.8.dist-info/METADATA +201 -0
  81. clickhouse_driver-0.2.8.dist-info/RECORD +89 -0
  82. {clickhouse_driver-0.2.1.dist-info → clickhouse_driver-0.2.8.dist-info}/WHEEL +1 -1
  83. clickhouse_driver-0.2.1.dist-info/METADATA +0 -24
  84. clickhouse_driver-0.2.1.dist-info/RECORD +0 -80
  85. {clickhouse_driver-0.2.1.dist-info → clickhouse_driver-0.2.8.dist-info}/top_level.txt +0 -0
@@ -1,666 +1,896 @@
1
- import re
2
- import ssl
3
- from contextlib import contextmanager
4
- from time import time
5
- import types
6
- from urllib.parse import urlparse, parse_qs, unquote
7
-
8
- from . import errors, defines
9
- from .block import ColumnOrientedBlock, RowOrientedBlock
10
- from .connection import Connection
11
- from .log import log_block
12
- from .protocol import ServerPacketTypes
13
- from .result import (
14
- IterQueryResult, ProgressQueryResult, QueryResult, QueryInfo
15
- )
16
- from .util.escape import escape_params
17
- from .util.helpers import column_chunks, chunks, asbool
18
-
19
-
20
- class Client(object):
21
- """
22
- Client for communication with the ClickHouse server.
23
- Single connection is established per each connected instance of the client.
24
-
25
- :param settings: Dictionary of settings that passed to every query (except
26
- for the client settings, see below). Defaults to ``None``
27
- (no additional settings). See all available settings in
28
- `ClickHouse docs
29
- <https://clickhouse.tech/docs/en/operations/settings/settings/>`_.
30
- :param \\**kwargs: All other args are passed to the
31
- :py:class:`~clickhouse_driver.connection.Connection`
32
- constructor.
33
-
34
- The following keys when passed in ``settings`` are used for configuring the
35
- client itself:
36
-
37
- * ``insert_block_size`` -- chunk size to split rows for ``INSERT``.
38
- Defaults to ``1048576``.
39
- * ``strings_as_bytes`` -- turns off string column encoding/decoding.
40
- * ``strings_encoding`` -- specifies string encoding. UTF-8 by default.
41
- * ``use_numpy`` -- Use numpy for columns reading. New in version
42
- *0.2.0*.
43
-
44
- """
45
-
46
- available_client_settings = (
47
- 'insert_block_size', # TODO: rename to max_insert_block_size
48
- 'strings_as_bytes',
49
- 'strings_encoding',
50
- 'use_numpy'
51
- )
52
-
53
- def __init__(self, *args, **kwargs):
54
- self.settings = kwargs.pop('settings', {}).copy()
55
-
56
- self.client_settings = {
57
- 'insert_block_size': int(self.settings.pop(
58
- 'insert_block_size', defines.DEFAULT_INSERT_BLOCK_SIZE,
59
- )),
60
- 'strings_as_bytes': self.settings.pop(
61
- 'strings_as_bytes', False
62
- ),
63
- 'strings_encoding': self.settings.pop(
64
- 'strings_encoding', defines.STRINGS_ENCODING
65
- ),
66
- 'use_numpy': self.settings.pop(
67
- 'use_numpy', False
68
- )
69
- }
70
-
71
- if self.client_settings['use_numpy']:
72
- try:
73
- from .numpy.result import (
74
- NumpyIterQueryResult, NumpyProgressQueryResult,
75
- NumpyQueryResult
76
- )
77
- self.query_result_cls = NumpyQueryResult
78
- self.iter_query_result_cls = NumpyIterQueryResult
79
- self.progress_query_result_cls = NumpyProgressQueryResult
80
- except ImportError:
81
- raise RuntimeError('Extras for NumPy must be installed')
82
- else:
83
- self.query_result_cls = QueryResult
84
- self.iter_query_result_cls = IterQueryResult
85
- self.progress_query_result_cls = ProgressQueryResult
86
-
87
- self.connection = Connection(*args, **kwargs)
88
- self.connection.context.settings = self.settings
89
- self.connection.context.client_settings = self.client_settings
90
- self.reset_last_query()
91
- super(Client, self).__init__()
92
-
93
- def disconnect(self):
94
- """
95
- Disconnects from the server.
96
- """
97
- self.connection.disconnect()
98
- self.reset_last_query()
99
-
100
- def reset_last_query(self):
101
- self.last_query = None
102
-
103
- def receive_result(self, with_column_types=False, progress=False,
104
- columnar=False):
105
-
106
- gen = self.packet_generator()
107
-
108
- if progress:
109
- return self.progress_query_result_cls(
110
- gen, with_column_types=with_column_types, columnar=columnar
111
- )
112
-
113
- else:
114
- result = self.query_result_cls(
115
- gen, with_column_types=with_column_types, columnar=columnar
116
- )
117
- return result.get_result()
118
-
119
- def iter_receive_result(self, with_column_types=False):
120
- gen = self.packet_generator()
121
-
122
- result = self.iter_query_result_cls(
123
- gen, with_column_types=with_column_types
124
- )
125
-
126
- for rows in result:
127
- for row in rows:
128
- yield row
129
-
130
- def packet_generator(self):
131
- while True:
132
- try:
133
- packet = self.receive_packet()
134
- if not packet:
135
- break
136
-
137
- if packet is True:
138
- continue
139
-
140
- yield packet
141
-
142
- except (Exception, KeyboardInterrupt):
143
- self.disconnect()
144
- raise
145
-
146
- def receive_packet(self):
147
- packet = self.connection.receive_packet()
148
-
149
- if packet.type == ServerPacketTypes.EXCEPTION:
150
- raise packet.exception
151
-
152
- elif packet.type == ServerPacketTypes.PROGRESS:
153
- self.last_query.store_progress(packet.progress)
154
- return packet
155
-
156
- elif packet.type == ServerPacketTypes.END_OF_STREAM:
157
- return False
158
-
159
- elif packet.type == ServerPacketTypes.DATA:
160
- return packet
161
-
162
- elif packet.type == ServerPacketTypes.TOTALS:
163
- return packet
164
-
165
- elif packet.type == ServerPacketTypes.EXTREMES:
166
- return packet
167
-
168
- elif packet.type == ServerPacketTypes.PROFILE_INFO:
169
- self.last_query.store_profile(packet.profile_info)
170
- return True
171
-
172
- else:
173
- return True
174
-
175
- def make_query_settings(self, settings):
176
- settings = dict(settings or {})
177
-
178
- # Pick client-related settings.
179
- client_settings = self.client_settings.copy()
180
- for key in self.available_client_settings:
181
- if key in settings:
182
- client_settings[key] = settings.pop(key)
183
-
184
- self.connection.context.client_settings = client_settings
185
-
186
- # The rest of settings are ClickHouse-related.
187
- query_settings = self.settings.copy()
188
- query_settings.update(settings)
189
- self.connection.context.settings = query_settings
190
-
191
- def track_current_database(self, query):
192
- query = query.strip('; ')
193
- if query.lower().startswith('use '):
194
- self.connection.database = query[4:].strip()
195
-
196
- @contextmanager
197
- def disconnect_on_error(self, query):
198
- try:
199
- yield
200
- self.track_current_database(query)
201
-
202
- except (Exception, KeyboardInterrupt):
203
- self.disconnect()
204
- raise
205
-
206
- def execute(self, query, params=None, with_column_types=False,
207
- external_tables=None, query_id=None, settings=None,
208
- types_check=False, columnar=False):
209
- """
210
- Executes query.
211
-
212
- Establishes new connection if it wasn't established yet.
213
- After query execution connection remains intact for next queries.
214
- If connection can't be reused it will be closed and new connection will
215
- be created.
216
-
217
- :param query: query that will be send to server.
218
- :param params: substitution parameters for SELECT queries and data for
219
- INSERT queries. Data for INSERT can be `list`, `tuple`
220
- or :data:`~types.GeneratorType`.
221
- Defaults to ``None`` (no parameters or data).
222
- :param with_column_types: if specified column names and types will be
223
- returned alongside with result.
224
- Defaults to ``False``.
225
- :param external_tables: external tables to send.
226
- Defaults to ``None`` (no external tables).
227
- :param query_id: the query identifier. If no query id specified
228
- ClickHouse server will generate it.
229
- :param settings: dictionary of query settings.
230
- Defaults to ``None`` (no additional settings).
231
- :param types_check: enables type checking of data for INSERT queries.
232
- Causes additional overhead. Defaults to ``False``.
233
- :param columnar: if specified the result of the SELECT query will be
234
- returned in column-oriented form.
235
- It also allows to INSERT data in columnar form.
236
- Defaults to ``False`` (row-like form).
237
-
238
- :return: * number of inserted rows for INSERT queries with data.
239
- Returning rows count from INSERT FROM SELECT is not
240
- supported.
241
- * if `with_column_types=False`: `list` of `tuples` with
242
- rows/columns.
243
- * if `with_column_types=True`: `tuple` of 2 elements:
244
- * The first element is `list` of `tuples` with
245
- rows/columns.
246
- * The second element information is about columns: names
247
- and types.
248
- """
249
-
250
- start_time = time()
251
- self.make_query_settings(settings)
252
- self.connection.force_connect()
253
- self.last_query = QueryInfo()
254
-
255
- with self.disconnect_on_error(query):
256
- # INSERT queries can use list/tuple/generator of list/tuples/dicts.
257
- # For SELECT parameters can be passed in only in dict right now.
258
- is_insert = isinstance(params, (list, tuple, types.GeneratorType))
259
-
260
- if is_insert:
261
- rv = self.process_insert_query(
262
- query, params, external_tables=external_tables,
263
- query_id=query_id, types_check=types_check,
264
- columnar=columnar
265
- )
266
- else:
267
- rv = self.process_ordinary_query(
268
- query, params=params, with_column_types=with_column_types,
269
- external_tables=external_tables,
270
- query_id=query_id, types_check=types_check,
271
- columnar=columnar
272
- )
273
- self.last_query.store_elapsed(time() - start_time)
274
- return rv
275
-
276
- def execute_with_progress(
277
- self, query, params=None, with_column_types=False,
278
- external_tables=None, query_id=None, settings=None,
279
- types_check=False, columnar=False):
280
- """
281
- Executes SELECT query with progress information.
282
- See, :ref:`execute-with-progress`.
283
-
284
- :param query: query that will be send to server.
285
- :param params: substitution parameters for SELECT queries and data for
286
- INSERT queries. Data for INSERT can be `list`, `tuple`
287
- or :data:`~types.GeneratorType`.
288
- Defaults to ``None`` (no parameters or data).
289
- :param with_column_types: if specified column names and types will be
290
- returned alongside with result.
291
- Defaults to ``False``.
292
- :param external_tables: external tables to send.
293
- Defaults to ``None`` (no external tables).
294
- :param query_id: the query identifier. If no query id specified
295
- ClickHouse server will generate it.
296
- :param settings: dictionary of query settings.
297
- Defaults to ``None`` (no additional settings).
298
- :param types_check: enables type checking of data for INSERT queries.
299
- Causes additional overhead. Defaults to ``False``.
300
- :param columnar: if specified the result will be returned in
301
- column-oriented form.
302
- Defaults to ``False`` (row-like form).
303
- :return: :ref:`progress-query-result` proxy.
304
- """
305
-
306
- self.make_query_settings(settings)
307
- self.connection.force_connect()
308
- self.last_query = QueryInfo()
309
-
310
- with self.disconnect_on_error(query):
311
- return self.process_ordinary_query_with_progress(
312
- query, params=params, with_column_types=with_column_types,
313
- external_tables=external_tables, query_id=query_id,
314
- types_check=types_check, columnar=columnar
315
- )
316
-
317
- def execute_iter(
318
- self, query, params=None, with_column_types=False,
319
- external_tables=None, query_id=None, settings=None,
320
- types_check=False):
321
- """
322
- *New in version 0.0.14.*
323
-
324
- Executes SELECT query with results streaming. See, :ref:`execute-iter`.
325
-
326
- :param query: query that will be send to server.
327
- :param params: substitution parameters for SELECT queries and data for
328
- INSERT queries. Data for INSERT can be `list`, `tuple`
329
- or :data:`~types.GeneratorType`.
330
- Defaults to ``None`` (no parameters or data).
331
- :param with_column_types: if specified column names and types will be
332
- returned alongside with result.
333
- Defaults to ``False``.
334
- :param external_tables: external tables to send.
335
- Defaults to ``None`` (no external tables).
336
- :param query_id: the query identifier. If no query id specified
337
- ClickHouse server will generate it.
338
- :param settings: dictionary of query settings.
339
- Defaults to ``None`` (no additional settings).
340
- :param types_check: enables type checking of data for INSERT queries.
341
- Causes additional overhead. Defaults to ``False``.
342
- :return: :ref:`iter-query-result` proxy.
343
- """
344
-
345
- self.make_query_settings(settings)
346
- self.connection.force_connect()
347
- self.last_query = QueryInfo()
348
-
349
- with self.disconnect_on_error(query):
350
- return self.iter_process_ordinary_query(
351
- query, params=params, with_column_types=with_column_types,
352
- external_tables=external_tables,
353
- query_id=query_id, types_check=types_check
354
- )
355
-
356
- def query_dataframe(
357
- self, query, params=None, external_tables=None, query_id=None,
358
- settings=None):
359
- """
360
- *New in version 0.2.0.*
361
-
362
- Queries DataFrame with specified SELECT query.
363
-
364
- :param query: query that will be send to server.
365
- :param params: substitution parameters.
366
- Defaults to ``None`` (no parameters or data).
367
- :param external_tables: external tables to send.
368
- Defaults to ``None`` (no external tables).
369
- :param query_id: the query identifier. If no query id specified
370
- ClickHouse server will generate it.
371
- :param settings: dictionary of query settings.
372
- Defaults to ``None`` (no additional settings).
373
- :return: pandas DataFrame.
374
- """
375
-
376
- try:
377
- import pandas as pd
378
- except ImportError:
379
- raise RuntimeError('Extras for NumPy must be installed')
380
-
381
- data, columns = self.execute(
382
- query, columnar=True, with_column_types=True, params=params,
383
- external_tables=external_tables, query_id=query_id,
384
- settings=settings
385
- )
386
-
387
- return pd.DataFrame(
388
- {re.sub(r'\W', '_', col[0]): d for d, col in zip(data, columns)}
389
- )
390
-
391
- def insert_dataframe(
392
- self, query, dataframe, transpose=True, external_tables=None,
393
- query_id=None, settings=None):
394
- """
395
- *New in version 0.2.0.*
396
-
397
- Inserts pandas DataFrame with specified query.
398
-
399
- :param query: query that will be send to server.
400
- :param dataframe: pandas DataFrame.
401
- :param transpose: whether or not transpose DataFrame before sending.
402
- This is necessary action as DataFrame can be sent in
403
- columnar form. If DataFrame is already in columnar
404
- form set this parameter to ``False``.
405
- Defaults to ``True``.
406
- :param external_tables: external tables to send.
407
- Defaults to ``None`` (no external tables).
408
- :param query_id: the query identifier. If no query id specified
409
- ClickHouse server will generate it.
410
- :param settings: dictionary of query settings.
411
- Defaults to ``None`` (no additional settings).
412
- :return: number of inserted rows.
413
- """
414
-
415
- try:
416
- import pandas as pd # noqa: F401
417
- except ImportError:
418
- raise RuntimeError('Extras for NumPy must be installed')
419
-
420
- frame = dataframe.transpose() if transpose else dataframe
421
- columns = list(frame.values)
422
-
423
- return self.execute(
424
- query, columns, columnar=True, external_tables=external_tables,
425
- query_id=query_id, settings=settings
426
- )
427
-
428
- def process_ordinary_query_with_progress(
429
- self, query, params=None, with_column_types=False,
430
- external_tables=None, query_id=None,
431
- types_check=False, columnar=False):
432
-
433
- if params is not None:
434
- query = self.substitute_params(query, params)
435
-
436
- self.connection.send_query(query, query_id=query_id)
437
- self.connection.send_external_tables(external_tables,
438
- types_check=types_check)
439
- return self.receive_result(with_column_types=with_column_types,
440
- progress=True, columnar=columnar)
441
-
442
- def process_ordinary_query(
443
- self, query, params=None, with_column_types=False,
444
- external_tables=None, query_id=None,
445
- types_check=False, columnar=False):
446
-
447
- if params is not None:
448
- query = self.substitute_params(query, params)
449
-
450
- self.connection.send_query(query, query_id=query_id)
451
- self.connection.send_external_tables(external_tables,
452
- types_check=types_check)
453
- return self.receive_result(with_column_types=with_column_types,
454
- columnar=columnar)
455
-
456
- def iter_process_ordinary_query(
457
- self, query, params=None, with_column_types=False,
458
- external_tables=None, query_id=None,
459
- types_check=False):
460
-
461
- if params is not None:
462
- query = self.substitute_params(query, params)
463
-
464
- self.connection.send_query(query, query_id=query_id)
465
- self.connection.send_external_tables(external_tables,
466
- types_check=types_check)
467
- return self.iter_receive_result(with_column_types=with_column_types)
468
-
469
- def process_insert_query(self, query_without_data, data,
470
- external_tables=None, query_id=None,
471
- types_check=False, columnar=False):
472
- self.connection.send_query(query_without_data, query_id=query_id)
473
- self.connection.send_external_tables(external_tables,
474
- types_check=types_check)
475
-
476
- sample_block = self.receive_sample_block()
477
- if sample_block:
478
- rv = self.send_data(sample_block, data,
479
- types_check=types_check, columnar=columnar)
480
- self.receive_end_of_query()
481
- return rv
482
-
483
- def receive_sample_block(self):
484
- while True:
485
- packet = self.connection.receive_packet()
486
-
487
- if packet.type == ServerPacketTypes.DATA:
488
- return packet.block
489
-
490
- elif packet.type == ServerPacketTypes.EXCEPTION:
491
- raise packet.exception
492
-
493
- elif packet.type == ServerPacketTypes.LOG:
494
- log_block(packet.block)
495
-
496
- elif packet.type == ServerPacketTypes.TABLE_COLUMNS:
497
- pass
498
-
499
- else:
500
- message = self.connection.unexpected_packet_message(
501
- 'Data, Exception, Log or TableColumns', packet.type
502
- )
503
- raise errors.UnexpectedPacketFromServerError(message)
504
-
505
- def send_data(self, sample_block, data, types_check=False, columnar=False):
506
- inserted_rows = 0
507
-
508
- client_settings = self.connection.context.client_settings
509
- block_cls = ColumnOrientedBlock if columnar else RowOrientedBlock
510
-
511
- if client_settings['use_numpy']:
512
- try:
513
- from .numpy.helpers import column_chunks as numpy_column_chunks
514
-
515
- if columnar:
516
- slicer = numpy_column_chunks
517
- else:
518
- raise ValueError(
519
- 'NumPy inserts is only allowed with columnar=True'
520
- )
521
-
522
- except ImportError:
523
- raise RuntimeError('Extras for NumPy must be installed')
524
-
525
- else:
526
- slicer = column_chunks if columnar else chunks
527
-
528
- for chunk in slicer(data, client_settings['insert_block_size']):
529
- block = block_cls(sample_block.columns_with_types, chunk,
530
- types_check=types_check)
531
- self.connection.send_data(block)
532
- inserted_rows += block.num_rows
533
-
534
- # Empty block means end of data.
535
- self.connection.send_data(block_cls())
536
- return inserted_rows
537
-
538
- def receive_end_of_query(self):
539
- while True:
540
- packet = self.connection.receive_packet()
541
-
542
- if packet.type == ServerPacketTypes.END_OF_STREAM:
543
- break
544
-
545
- elif packet.type == ServerPacketTypes.EXCEPTION:
546
- raise packet.exception
547
-
548
- elif packet.type == ServerPacketTypes.LOG:
549
- log_block(packet.block)
550
-
551
- elif packet.type == ServerPacketTypes.TABLE_COLUMNS:
552
- pass
553
-
554
- else:
555
- message = self.connection.unexpected_packet_message(
556
- 'Exception, EndOfStream or Log', packet.type
557
- )
558
- raise errors.UnexpectedPacketFromServerError(message)
559
-
560
- def cancel(self, with_column_types=False):
561
- # TODO: Add warning if already cancelled.
562
- self.connection.send_cancel()
563
- # Client must still read until END_OF_STREAM packet.
564
- return self.receive_result(with_column_types=with_column_types)
565
-
566
- def substitute_params(self, query, params):
567
- if not isinstance(params, dict):
568
- raise ValueError('Parameters are expected in dict form')
569
-
570
- escaped = escape_params(params)
571
- return query % escaped
572
-
573
- @classmethod
574
- def from_url(cls, url):
575
- """
576
- Return a client configured from the given URL.
577
-
578
- For example::
579
-
580
- clickhouse://[user:password]@localhost:9000/default
581
- clickhouses://[user:password]@localhost:9440/default
582
-
583
- Three URL schemes are supported:
584
- clickhouse:// creates a normal TCP socket connection
585
- clickhouses:// creates a SSL wrapped TCP socket connection
586
-
587
- Any additional querystring arguments will be passed along to
588
- the Connection class's initializer.
589
- """
590
- url = urlparse(url)
591
-
592
- settings = {}
593
- kwargs = {}
594
-
595
- host = url.hostname
596
-
597
- if url.port is not None:
598
- kwargs['port'] = url.port
599
-
600
- path = url.path.replace('/', '', 1)
601
- if path:
602
- kwargs['database'] = path
603
-
604
- if url.username is not None:
605
- kwargs['user'] = unquote(url.username)
606
-
607
- if url.password is not None:
608
- kwargs['password'] = unquote(url.password)
609
-
610
- if url.scheme == 'clickhouses':
611
- kwargs['secure'] = True
612
-
613
- compression_algs = {'lz4', 'lz4hc', 'zstd'}
614
- timeouts = {
615
- 'connect_timeout',
616
- 'send_receive_timeout',
617
- 'sync_request_timeout'
618
- }
619
-
620
- for name, value in parse_qs(url.query).items():
621
- if not value or not len(value):
622
- continue
623
-
624
- value = value[0]
625
-
626
- if name == 'compression':
627
- value = value.lower()
628
- if value in compression_algs:
629
- kwargs[name] = value
630
- else:
631
- kwargs[name] = asbool(value)
632
-
633
- elif name == 'secure':
634
- kwargs[name] = asbool(value)
635
-
636
- elif name == 'use_numpy':
637
- settings[name] = asbool(value)
638
-
639
- elif name == 'client_name':
640
- kwargs[name] = value
641
-
642
- elif name in timeouts:
643
- kwargs[name] = float(value)
644
-
645
- elif name == 'compress_block_size':
646
- kwargs[name] = int(value)
647
-
648
- elif name == 'settings_is_important':
649
- kwargs[name] = asbool(value)
650
-
651
- # ssl
652
- elif name == 'verify':
653
- kwargs[name] = asbool(value)
654
- elif name == 'ssl_version':
655
- kwargs[name] = getattr(ssl, value)
656
- elif name in ['ca_certs', 'ciphers', 'keyfile', 'certfile']:
657
- kwargs[name] = value
658
- elif name == 'alt_hosts':
659
- kwargs['alt_hosts'] = value
660
- else:
661
- settings[name] = value
662
-
663
- if settings:
664
- kwargs['settings'] = settings
665
-
666
- return cls(host, **kwargs)
1
+ import re
2
+ import ssl
3
+ from collections import deque
4
+ from contextlib import contextmanager
5
+ from time import time
6
+ import types
7
+ from urllib.parse import urlparse, parse_qs, unquote
8
+
9
+ from . import errors, defines
10
+ from .block import ColumnOrientedBlock, RowOrientedBlock
11
+ from .connection import Connection
12
+ from .log import log_block
13
+ from .protocol import ServerPacketTypes
14
+ from .result import (
15
+ IterQueryResult, ProgressQueryResult, QueryResult, QueryInfo
16
+ )
17
+ from .util.escape import escape_params
18
+ from .util.helpers import column_chunks, chunks, asbool
19
+
20
+
21
+ class Client(object):
22
+ """
23
+ Client for communication with the ClickHouse server.
24
+ Single connection is established per each connected instance of the client.
25
+
26
+ :param settings: Dictionary of settings that passed to every query (except
27
+ for the client settings, see below). Defaults to ``None``
28
+ (no additional settings). See all available settings in
29
+ `ClickHouse docs
30
+ <https://clickhouse.com/docs/en/operations/settings/settings/>`_.
31
+ :param \\**kwargs: All other args are passed to the
32
+ :py:class:`~clickhouse_driver.connection.Connection`
33
+ constructor.
34
+
35
+ The following keys when passed in ``settings`` are used for configuring the
36
+ client itself:
37
+
38
+ * ``insert_block_size`` -- chunk size to split rows for ``INSERT``.
39
+ Defaults to ``1048576``.
40
+ * ``strings_as_bytes`` -- turns off string column encoding/decoding.
41
+ * ``strings_encoding`` -- specifies string encoding. UTF-8 by default.
42
+ * ``use_numpy`` -- Use NumPy for columns reading. New in version
43
+ *0.2.0*.
44
+ * ``opentelemetry_traceparent`` -- OpenTelemetry traceparent header as
45
+ described by W3C Trace Context recommendation.
46
+ New in version *0.2.2*.
47
+ * ``opentelemetry_tracestate`` -- OpenTelemetry tracestate header as
48
+ described by W3C Trace Context recommendation.
49
+ New in version *0.2.2*.
50
+ * ``quota_key`` -- A string to differentiate quotas when the user have
51
+ keyed quotas configured on server.
52
+ New in version *0.2.3*.
53
+ * ``input_format_null_as_default`` -- Initialize null fields with
54
+ default values if data type of this field is not
55
+ nullable. Does not work for NumPy. Default: False.
56
+ New in version *0.2.4*.
57
+ * ``round_robin`` -- If ``alt_hosts`` are provided the query will be
58
+ executed on host picked with round-robin algorithm.
59
+ New in version *0.2.5*.
60
+ * ``namedtuple_as_json`` -- Controls named tuple and nested types
61
+ deserialization. To interpret these column alongside
62
+ with ``allow_experimental_object_type=1`` as Python
63
+ tuple set ``namedtuple_as_json`` to ``False``.
64
+ Default: True.
65
+ New in version *0.2.6*.
66
+ * ``server_side_params`` -- Species on which side query parameters
67
+ should be rendered into placeholders.
68
+ Default: False. Means that parameters are rendered
69
+ on driver's side.
70
+ New in version *0.2.7*.
71
+ """
72
+
73
+ available_client_settings = (
74
+ 'insert_block_size', # TODO: rename to max_insert_block_size
75
+ 'strings_as_bytes',
76
+ 'strings_encoding',
77
+ 'use_numpy',
78
+ 'opentelemetry_traceparent',
79
+ 'opentelemetry_tracestate',
80
+ 'quota_key',
81
+ 'input_format_null_as_default',
82
+ 'namedtuple_as_json',
83
+ 'server_side_params'
84
+ )
85
+
86
+ def __init__(self, *args, **kwargs):
87
+ self.settings = (kwargs.pop('settings', None) or {}).copy()
88
+
89
+ self.client_settings = {
90
+ 'insert_block_size': int(self.settings.pop(
91
+ 'insert_block_size', defines.DEFAULT_INSERT_BLOCK_SIZE,
92
+ )),
93
+ 'strings_as_bytes': self.settings.pop(
94
+ 'strings_as_bytes', False
95
+ ),
96
+ 'strings_encoding': self.settings.pop(
97
+ 'strings_encoding', defines.STRINGS_ENCODING
98
+ ),
99
+ 'use_numpy': self.settings.pop(
100
+ 'use_numpy', False
101
+ ),
102
+ 'opentelemetry_traceparent': self.settings.pop(
103
+ 'opentelemetry_traceparent', None
104
+ ),
105
+ 'opentelemetry_tracestate': self.settings.pop(
106
+ 'opentelemetry_tracestate', ''
107
+ ),
108
+ 'quota_key': self.settings.pop(
109
+ 'quota_key', ''
110
+ ),
111
+ 'input_format_null_as_default': self.settings.pop(
112
+ 'input_format_null_as_default', False
113
+ ),
114
+ 'namedtuple_as_json': self.settings.pop(
115
+ 'namedtuple_as_json', True
116
+ ),
117
+ 'server_side_params': self.settings.pop(
118
+ 'server_side_params', False
119
+ )
120
+ }
121
+
122
+ if self.client_settings['use_numpy']:
123
+ try:
124
+ from .numpy.result import (
125
+ NumpyIterQueryResult, NumpyProgressQueryResult,
126
+ NumpyQueryResult
127
+ )
128
+ self.query_result_cls = NumpyQueryResult
129
+ self.iter_query_result_cls = NumpyIterQueryResult
130
+ self.progress_query_result_cls = NumpyProgressQueryResult
131
+ except ImportError:
132
+ raise RuntimeError('Extras for NumPy must be installed')
133
+ else:
134
+ self.query_result_cls = QueryResult
135
+ self.iter_query_result_cls = IterQueryResult
136
+ self.progress_query_result_cls = ProgressQueryResult
137
+
138
+ round_robin = kwargs.pop('round_robin', False)
139
+ self.connections = deque([Connection(*args, **kwargs)])
140
+
141
+ if round_robin and 'alt_hosts' in kwargs:
142
+ alt_hosts = kwargs.pop('alt_hosts')
143
+ for host in alt_hosts.split(','):
144
+ url = urlparse('clickhouse://' + host)
145
+
146
+ connection_kwargs = kwargs.copy()
147
+ num_args = len(args)
148
+ if num_args >= 2:
149
+ # host and port as positional arguments
150
+ connection_args = (url.hostname, url.port) + args[2:]
151
+ elif num_args >= 1:
152
+ # host as positional and port as keyword argument
153
+ connection_args = (url.hostname, ) + args[1:]
154
+ connection_kwargs['port'] = url.port
155
+ else:
156
+ # host and port as keyword arguments
157
+ connection_args = tuple()
158
+ connection_kwargs['host'] = url.hostname
159
+ connection_kwargs['port'] = url.port
160
+
161
+ connection = Connection(*connection_args, **connection_kwargs)
162
+ self.connections.append(connection)
163
+
164
+ self.connection = self.get_connection()
165
+ self.reset_last_query()
166
+ super(Client, self).__init__()
167
+
168
+ def __enter__(self):
169
+ return self
170
+
171
+ def __exit__(self, exc_type, exc_val, exc_tb):
172
+ self.disconnect()
173
+
174
+ def get_connection(self):
175
+ if hasattr(self, 'connection'):
176
+ self.connections.append(self.connection)
177
+
178
+ connection = self.connections.popleft()
179
+
180
+ connection.context.settings = self.settings
181
+ connection.context.client_settings = self.client_settings
182
+ return connection
183
+
184
+ def disconnect(self):
185
+ self.disconnect_connection()
186
+ for connection in self.connections:
187
+ connection.disconnect()
188
+
189
+ def disconnect_connection(self):
190
+ """
191
+ Disconnects from the server.
192
+ """
193
+ self.connection.disconnect()
194
+ self.reset_last_query()
195
+
196
+ def reset_last_query(self):
197
+ self.last_query = None
198
+
199
+ def receive_result(self, with_column_types=False, progress=False,
200
+ columnar=False):
201
+
202
+ gen = self.packet_generator()
203
+
204
+ if progress:
205
+ return self.progress_query_result_cls(
206
+ gen, with_column_types=with_column_types, columnar=columnar
207
+ )
208
+
209
+ else:
210
+ result = self.query_result_cls(
211
+ gen, with_column_types=with_column_types, columnar=columnar
212
+ )
213
+ return result.get_result()
214
+
215
+ def iter_receive_result(self, with_column_types=False):
216
+ gen = self.packet_generator()
217
+
218
+ result = self.iter_query_result_cls(
219
+ gen, with_column_types=with_column_types
220
+ )
221
+
222
+ for rows in result:
223
+ for row in rows:
224
+ yield row
225
+
226
+ def packet_generator(self):
227
+ while True:
228
+ try:
229
+ packet = self.receive_packet()
230
+ if not packet:
231
+ break
232
+
233
+ if packet is True:
234
+ continue
235
+
236
+ yield packet
237
+
238
+ except (Exception, KeyboardInterrupt):
239
+ self.disconnect()
240
+ raise
241
+
242
+ def receive_packet(self):
243
+ packet = self.connection.receive_packet()
244
+
245
+ if packet.type == ServerPacketTypes.EXCEPTION:
246
+ raise packet.exception
247
+
248
+ elif packet.type == ServerPacketTypes.PROGRESS:
249
+ self.last_query.store_progress(packet.progress)
250
+ return packet
251
+
252
+ elif packet.type == ServerPacketTypes.END_OF_STREAM:
253
+ return False
254
+
255
+ elif packet.type == ServerPacketTypes.DATA:
256
+ return packet
257
+
258
+ elif packet.type == ServerPacketTypes.TOTALS:
259
+ return packet
260
+
261
+ elif packet.type == ServerPacketTypes.EXTREMES:
262
+ return packet
263
+
264
+ elif packet.type == ServerPacketTypes.PROFILE_INFO:
265
+ self.last_query.store_profile(packet.profile_info)
266
+ return True
267
+
268
+ else:
269
+ return True
270
+
271
+ def make_query_settings(self, settings):
272
+ settings = dict(settings or {})
273
+
274
+ # Pick client-related settings.
275
+ client_settings = self.client_settings.copy()
276
+ for key in self.available_client_settings:
277
+ if key in settings:
278
+ client_settings[key] = settings.pop(key)
279
+
280
+ self.connection.context.client_settings = client_settings
281
+
282
+ # The rest of settings are ClickHouse-related.
283
+ query_settings = self.settings.copy()
284
+ query_settings.update(settings)
285
+ self.connection.context.settings = query_settings
286
+
287
+ def track_current_database(self, query):
288
+ query = query.strip('; ')
289
+ if query.lower().startswith('use '):
290
+ self.connection.database = query[4:].strip()
291
+
292
+ def establish_connection(self, settings):
293
+ num_connections = len(self.connections)
294
+ if hasattr(self, 'connection'):
295
+ num_connections += 1
296
+
297
+ for i in range(num_connections):
298
+ try:
299
+ self.connection = self.get_connection()
300
+ self.make_query_settings(settings)
301
+ self.connection.force_connect()
302
+ self.last_query = QueryInfo()
303
+
304
+ except (errors.SocketTimeoutError, errors.NetworkError):
305
+ if i < num_connections - 1:
306
+ continue
307
+ raise
308
+
309
+ return
310
+
311
+ @contextmanager
312
+ def disconnect_on_error(self, query, settings):
313
+ try:
314
+ self.establish_connection(settings)
315
+
316
+ yield
317
+
318
+ self.track_current_database(query)
319
+
320
+ except (Exception, KeyboardInterrupt):
321
+ self.disconnect()
322
+ raise
323
+
324
+ def execute(self, query, params=None, with_column_types=False,
325
+ external_tables=None, query_id=None, settings=None,
326
+ types_check=False, columnar=False):
327
+ """
328
+ Executes query.
329
+
330
+ Establishes new connection if it wasn't established yet.
331
+ After query execution connection remains intact for next queries.
332
+ If connection can't be reused it will be closed and new connection will
333
+ be created.
334
+
335
+ :param query: query that will be send to server.
336
+ :param params: substitution parameters for SELECT queries and data for
337
+ INSERT queries. Data for INSERT can be `list`, `tuple`
338
+ or :data:`~types.GeneratorType`.
339
+ Defaults to ``None`` (no parameters or data).
340
+ :param with_column_types: if specified column names and types will be
341
+ returned alongside with result.
342
+ Defaults to ``False``.
343
+ :param external_tables: external tables to send.
344
+ Defaults to ``None`` (no external tables).
345
+ :param query_id: the query identifier. If no query id specified
346
+ ClickHouse server will generate it.
347
+ :param settings: dictionary of query settings.
348
+ Defaults to ``None`` (no additional settings).
349
+ :param types_check: enables type checking of data for INSERT queries.
350
+ Causes additional overhead. Defaults to ``False``.
351
+ :param columnar: if specified the result of the SELECT query will be
352
+ returned in column-oriented form.
353
+ It also allows to INSERT data in columnar form.
354
+ Defaults to ``False`` (row-like form).
355
+
356
+ :return: * number of inserted rows for INSERT queries with data.
357
+ Returning rows count from INSERT FROM SELECT is not
358
+ supported.
359
+ * if `with_column_types=False`: `list` of `tuples` with
360
+ rows/columns.
361
+ * if `with_column_types=True`: `tuple` of 2 elements:
362
+ * The first element is `list` of `tuples` with
363
+ rows/columns.
364
+ * The second element information is about columns: names
365
+ and types.
366
+ """
367
+
368
+ start_time = time()
369
+
370
+ with self.disconnect_on_error(query, settings):
371
+ # INSERT queries can use list/tuple/generator of list/tuples/dicts.
372
+ # For SELECT parameters can be passed in only in dict right now.
373
+ is_insert = isinstance(params, (list, tuple, types.GeneratorType))
374
+
375
+ if is_insert:
376
+ rv = self.process_insert_query(
377
+ query, params, external_tables=external_tables,
378
+ query_id=query_id, types_check=types_check,
379
+ columnar=columnar
380
+ )
381
+ else:
382
+ rv = self.process_ordinary_query(
383
+ query, params=params, with_column_types=with_column_types,
384
+ external_tables=external_tables,
385
+ query_id=query_id, types_check=types_check,
386
+ columnar=columnar
387
+ )
388
+ self.last_query.store_elapsed(time() - start_time)
389
+ return rv
390
+
391
+ def execute_with_progress(
392
+ self, query, params=None, with_column_types=False,
393
+ external_tables=None, query_id=None, settings=None,
394
+ types_check=False, columnar=False):
395
+ """
396
+ Executes SELECT query with progress information.
397
+ See, :ref:`execute-with-progress`.
398
+
399
+ :param query: query that will be send to server.
400
+ :param params: substitution parameters for SELECT queries and data for
401
+ INSERT queries. Data for INSERT can be `list`, `tuple`
402
+ or :data:`~types.GeneratorType`.
403
+ Defaults to ``None`` (no parameters or data).
404
+ :param with_column_types: if specified column names and types will be
405
+ returned alongside with result.
406
+ Defaults to ``False``.
407
+ :param external_tables: external tables to send.
408
+ Defaults to ``None`` (no external tables).
409
+ :param query_id: the query identifier. If no query id specified
410
+ ClickHouse server will generate it.
411
+ :param settings: dictionary of query settings.
412
+ Defaults to ``None`` (no additional settings).
413
+ :param types_check: enables type checking of data for INSERT queries.
414
+ Causes additional overhead. Defaults to ``False``.
415
+ :param columnar: if specified the result will be returned in
416
+ column-oriented form.
417
+ Defaults to ``False`` (row-like form).
418
+ :return: :ref:`progress-query-result` proxy.
419
+ """
420
+
421
+ with self.disconnect_on_error(query, settings):
422
+ return self.process_ordinary_query_with_progress(
423
+ query, params=params, with_column_types=with_column_types,
424
+ external_tables=external_tables, query_id=query_id,
425
+ types_check=types_check, columnar=columnar
426
+ )
427
+
428
+ def execute_iter(
429
+ self, query, params=None, with_column_types=False,
430
+ external_tables=None, query_id=None, settings=None,
431
+ types_check=False, chunk_size=1):
432
+ """
433
+ *New in version 0.0.14.*
434
+
435
+ Executes SELECT query with results streaming. See, :ref:`execute-iter`.
436
+
437
+ :param query: query that will be send to server.
438
+ :param params: substitution parameters for SELECT queries and data for
439
+ INSERT queries. Data for INSERT can be `list`, `tuple`
440
+ or :data:`~types.GeneratorType`.
441
+ Defaults to ``None`` (no parameters or data).
442
+ :param with_column_types: if specified column names and types will be
443
+ returned alongside with result.
444
+ Defaults to ``False``.
445
+ :param external_tables: external tables to send.
446
+ Defaults to ``None`` (no external tables).
447
+ :param query_id: the query identifier. If no query id specified
448
+ ClickHouse server will generate it.
449
+ :param settings: dictionary of query settings.
450
+ Defaults to ``None`` (no additional settings).
451
+ :param types_check: enables type checking of data for INSERT queries.
452
+ Causes additional overhead. Defaults to ``False``.
453
+ :param chunk_size: chunk query results.
454
+ :return: :ref:`iter-query-result` proxy.
455
+ """
456
+ with self.disconnect_on_error(query, settings):
457
+ rv = self.iter_process_ordinary_query(
458
+ query, params=params, with_column_types=with_column_types,
459
+ external_tables=external_tables,
460
+ query_id=query_id, types_check=types_check
461
+ )
462
+ return chunks(rv, chunk_size) if chunk_size > 1 else rv
463
+
464
+ def query_dataframe(
465
+ self, query, params=None, external_tables=None, query_id=None,
466
+ settings=None, replace_nonwords=True):
467
+ """
468
+ *New in version 0.2.0.*
469
+
470
+ Queries DataFrame with specified SELECT query.
471
+
472
+ :param query: query that will be send to server.
473
+ :param params: substitution parameters.
474
+ Defaults to ``None`` (no parameters or data).
475
+ :param external_tables: external tables to send.
476
+ Defaults to ``None`` (no external tables).
477
+ :param query_id: the query identifier. If no query id specified
478
+ ClickHouse server will generate it.
479
+ :param settings: dictionary of query settings.
480
+ Defaults to ``None`` (no additional settings).
481
+ :param replace_nonwords: boolean to replace non-words in column names
482
+ to underscores. Defaults to ``True``.
483
+ :return: pandas DataFrame.
484
+ """
485
+
486
+ try:
487
+ import pandas as pd
488
+ except ImportError:
489
+ raise RuntimeError('Extras for NumPy must be installed')
490
+
491
+ data, columns = self.execute(
492
+ query, columnar=True, with_column_types=True, params=params,
493
+ external_tables=external_tables, query_id=query_id,
494
+ settings=settings
495
+ )
496
+
497
+ columns = [name for name, type_ in columns]
498
+ if replace_nonwords:
499
+ columns = [re.sub(r'\W', '_', x) for x in columns]
500
+
501
+ return pd.DataFrame(
502
+ {col: d for d, col in zip(data, columns)}, columns=columns
503
+ )
504
+
505
+ def insert_dataframe(
506
+ self, query, dataframe, external_tables=None, query_id=None,
507
+ settings=None):
508
+ """
509
+ *New in version 0.2.0.*
510
+
511
+ Inserts pandas DataFrame with specified query.
512
+
513
+ :param query: query that will be send to server.
514
+ :param dataframe: pandas DataFrame.
515
+ :param external_tables: external tables to send.
516
+ Defaults to ``None`` (no external tables).
517
+ :param query_id: the query identifier. If no query id specified
518
+ ClickHouse server will generate it.
519
+ :param settings: dictionary of query settings.
520
+ Defaults to ``None`` (no additional settings).
521
+ :return: number of inserted rows.
522
+ """
523
+
524
+ try:
525
+ import pandas as pd # noqa: F401
526
+ except ImportError:
527
+ raise RuntimeError('Extras for NumPy must be installed')
528
+
529
+ start_time = time()
530
+
531
+ with self.disconnect_on_error(query, settings):
532
+ self.connection.send_query(query, query_id=query_id)
533
+ self.connection.send_external_tables(external_tables)
534
+
535
+ sample_block = self.receive_sample_block()
536
+ rv = None
537
+ if sample_block:
538
+ columns = [x[0] for x in sample_block.columns_with_types]
539
+ # raise if any columns are missing from the dataframe
540
+ diff = set(columns) - set(dataframe.columns)
541
+ if len(diff):
542
+ msg = "DataFrame missing required columns: {}"
543
+ raise ValueError(msg.format(list(diff)))
544
+
545
+ data = [dataframe[column].values for column in columns]
546
+ rv = self.send_data(sample_block, data, columnar=True)
547
+ self.receive_end_of_query()
548
+
549
+ self.last_query.store_elapsed(time() - start_time)
550
+ return rv
551
+
552
+ def process_ordinary_query_with_progress(
553
+ self, query, params=None, with_column_types=False,
554
+ external_tables=None, query_id=None,
555
+ types_check=False, columnar=False):
556
+
557
+ if params is not None:
558
+ query = self.substitute_params(
559
+ query, params, self.connection.context
560
+ )
561
+
562
+ self.connection.send_query(query, query_id=query_id, params=params)
563
+ self.connection.send_external_tables(external_tables,
564
+ types_check=types_check)
565
+ return self.receive_result(with_column_types=with_column_types,
566
+ progress=True, columnar=columnar)
567
+
568
+ def process_ordinary_query(
569
+ self, query, params=None, with_column_types=False,
570
+ external_tables=None, query_id=None,
571
+ types_check=False, columnar=False):
572
+
573
+ if params is not None:
574
+ query = self.substitute_params(
575
+ query, params, self.connection.context
576
+ )
577
+ self.connection.send_query(query, query_id=query_id, params=params)
578
+ self.connection.send_external_tables(external_tables,
579
+ types_check=types_check)
580
+ return self.receive_result(with_column_types=with_column_types,
581
+ columnar=columnar)
582
+
583
+ def iter_process_ordinary_query(
584
+ self, query, params=None, with_column_types=False,
585
+ external_tables=None, query_id=None,
586
+ types_check=False):
587
+
588
+ if params is not None:
589
+ query = self.substitute_params(
590
+ query, params, self.connection.context
591
+ )
592
+
593
+ self.connection.send_query(query, query_id=query_id, params=params)
594
+ self.connection.send_external_tables(external_tables,
595
+ types_check=types_check)
596
+ return self.iter_receive_result(with_column_types=with_column_types)
597
+
598
+ def process_insert_query(self, query_without_data, data,
599
+ external_tables=None, query_id=None,
600
+ types_check=False, columnar=False):
601
+ self.connection.send_query(query_without_data, query_id=query_id)
602
+ self.connection.send_external_tables(external_tables,
603
+ types_check=types_check)
604
+ sample_block = self.receive_sample_block()
605
+
606
+ if sample_block:
607
+ rv = self.send_data(sample_block, data,
608
+ types_check=types_check, columnar=columnar)
609
+ self.receive_end_of_insert_query()
610
+ return rv
611
+
612
+ def receive_sample_block(self):
613
+ while True:
614
+ packet = self.connection.receive_packet()
615
+
616
+ if packet.type == ServerPacketTypes.DATA:
617
+ return packet.block
618
+
619
+ elif packet.type == ServerPacketTypes.EXCEPTION:
620
+ raise packet.exception
621
+
622
+ elif packet.type == ServerPacketTypes.LOG:
623
+ log_block(packet.block)
624
+
625
+ elif packet.type == ServerPacketTypes.TABLE_COLUMNS:
626
+ pass
627
+
628
+ else:
629
+ message = self.connection.unexpected_packet_message(
630
+ 'Data, Exception, Log or TableColumns', packet.type
631
+ )
632
+ raise errors.UnexpectedPacketFromServerError(message)
633
+
634
+ def send_data(self, sample_block, data, types_check=False, columnar=False):
635
+ inserted_rows = 0
636
+
637
+ client_settings = self.connection.context.client_settings
638
+ block_cls = ColumnOrientedBlock if columnar else RowOrientedBlock
639
+
640
+ if client_settings['use_numpy']:
641
+ try:
642
+ from .numpy.helpers import column_chunks as numpy_column_chunks
643
+
644
+ if columnar:
645
+ slicer = numpy_column_chunks
646
+ else:
647
+ raise ValueError(
648
+ 'NumPy inserts is only allowed with columnar=True'
649
+ )
650
+
651
+ except ImportError:
652
+ raise RuntimeError('Extras for NumPy must be installed')
653
+
654
+ else:
655
+ slicer = column_chunks if columnar else chunks
656
+
657
+ for chunk in slicer(data, client_settings['insert_block_size']):
658
+ block = block_cls(sample_block.columns_with_types, chunk,
659
+ types_check=types_check)
660
+ self.connection.send_data(block)
661
+ inserted_rows += block.num_rows
662
+
663
+ # Starting from the specific revision there are profile events
664
+ # sent by server in response to each inserted block
665
+ self.receive_profile_events()
666
+
667
+ # Empty block means end of data.
668
+ self.connection.send_data(block_cls())
669
+ # If enabled by revision profile events are also sent after empty block
670
+ self.receive_profile_events()
671
+
672
+ return inserted_rows
673
+
674
+ def receive_end_of_query(self):
675
+ while True:
676
+ packet = self.connection.receive_packet()
677
+
678
+ if packet.type == ServerPacketTypes.END_OF_STREAM:
679
+ break
680
+
681
+ elif packet.type == ServerPacketTypes.PROGRESS:
682
+ self.last_query.store_progress(packet.progress)
683
+
684
+ elif packet.type == ServerPacketTypes.EXCEPTION:
685
+ raise packet.exception
686
+
687
+ elif packet.type == ServerPacketTypes.LOG:
688
+ log_block(packet.block)
689
+
690
+ elif packet.type == ServerPacketTypes.TABLE_COLUMNS:
691
+ pass
692
+
693
+ elif packet.type == ServerPacketTypes.PROFILE_EVENTS:
694
+ self.last_query.store_profile(packet.profile_info)
695
+
696
+ else:
697
+ message = self.connection.unexpected_packet_message(
698
+ 'Exception, EndOfStream, Progress, TableColumns, '
699
+ 'ProfileEvents or Log', packet.type
700
+ )
701
+ raise errors.UnexpectedPacketFromServerError(message)
702
+
703
+ def receive_end_of_insert_query(self):
704
+ while True:
705
+ packet = self.connection.receive_packet()
706
+
707
+ if packet.type == ServerPacketTypes.END_OF_STREAM:
708
+ break
709
+
710
+ elif packet.type == ServerPacketTypes.LOG:
711
+ log_block(packet.block)
712
+
713
+ elif packet.type == ServerPacketTypes.PROGRESS:
714
+ self.last_query.store_progress(packet.progress)
715
+
716
+ elif packet.type == ServerPacketTypes.EXCEPTION:
717
+ raise packet.exception
718
+
719
+ else:
720
+ message = self.connection.unexpected_packet_message(
721
+ 'EndOfStream, Log, Progress or Exception', packet.type
722
+ )
723
+ raise errors.UnexpectedPacketFromServerError(message)
724
+
725
+ def receive_profile_events(self):
726
+ revision = self.connection.server_info.used_revision
727
+ if (
728
+ revision <
729
+ defines.DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT
730
+ ):
731
+ return None
732
+
733
+ while True:
734
+ packet = self.connection.receive_packet()
735
+
736
+ if packet.type == ServerPacketTypes.PROFILE_EVENTS:
737
+ self.last_query.store_profile(packet.profile_info)
738
+ break
739
+
740
+ elif packet.type == ServerPacketTypes.PROGRESS:
741
+ self.last_query.store_progress(packet.progress)
742
+
743
+ elif packet.type == ServerPacketTypes.LOG:
744
+ log_block(packet.block)
745
+
746
+ elif packet.type == ServerPacketTypes.EXCEPTION:
747
+ raise packet.exception
748
+
749
+ else:
750
+ message = self.connection.unexpected_packet_message(
751
+ 'ProfileEvents, Progress, Log or Exception', packet.type
752
+ )
753
+ raise errors.UnexpectedPacketFromServerError(message)
754
+
755
+ def cancel(self, with_column_types=False):
756
+ # TODO: Add warning if already cancelled.
757
+ self.connection.send_cancel()
758
+ # Client must still read until END_OF_STREAM packet.
759
+ return self.receive_result(with_column_types=with_column_types)
760
+
761
+ def substitute_params(self, query, params, context):
762
+ """
763
+ Substitutes parameters into a provided query.
764
+
765
+ For example::
766
+
767
+ client = Client(...)
768
+
769
+ substituted_query = client.substitute_params(
770
+ query='SELECT 1234, %(foo)s',
771
+ params={'foo': 'bar'},
772
+ context=client.connection.context
773
+ )
774
+
775
+ # prints: SELECT 1234, 'bar'
776
+ print(substituted_query)
777
+ """
778
+ # In case of server side templating we don't substitute here.
779
+ if self.connection.context.client_settings['server_side_params']:
780
+ return query
781
+
782
+ if not isinstance(params, dict):
783
+ raise ValueError('Parameters are expected in dict form')
784
+
785
+ escaped = escape_params(params, context)
786
+ return query % escaped
787
+
788
+ @classmethod
789
+ def from_url(cls, url):
790
+ """
791
+ Return a client configured from the given URL.
792
+
793
+ For example::
794
+
795
+ clickhouse://[user:password]@localhost:9000/default
796
+ clickhouses://[user:password]@localhost:9440/default
797
+
798
+ Three URL schemes are supported:
799
+ clickhouse:// creates a normal TCP socket connection
800
+ clickhouses:// creates a SSL wrapped TCP socket connection
801
+
802
+ Any additional querystring arguments will be passed along to
803
+ the Connection class's initializer.
804
+ """
805
+ url = urlparse(url)
806
+
807
+ settings = {}
808
+ kwargs = {}
809
+
810
+ host = url.hostname
811
+
812
+ if url.port is not None:
813
+ kwargs['port'] = url.port
814
+
815
+ path = url.path.replace('/', '', 1)
816
+ if path:
817
+ kwargs['database'] = path
818
+
819
+ if url.username is not None:
820
+ kwargs['user'] = unquote(url.username)
821
+
822
+ if url.password is not None:
823
+ kwargs['password'] = unquote(url.password)
824
+
825
+ if url.scheme == 'clickhouses':
826
+ kwargs['secure'] = True
827
+
828
+ compression_algs = {'lz4', 'lz4hc', 'zstd'}
829
+ timeouts = {
830
+ 'connect_timeout',
831
+ 'send_receive_timeout',
832
+ 'sync_request_timeout'
833
+ }
834
+
835
+ for name, value in parse_qs(url.query).items():
836
+ if not value or not len(value):
837
+ continue
838
+
839
+ value = value[0]
840
+
841
+ if name == 'compression':
842
+ value = value.lower()
843
+ if value in compression_algs:
844
+ kwargs[name] = value
845
+ else:
846
+ kwargs[name] = asbool(value)
847
+
848
+ elif name == 'secure':
849
+ kwargs[name] = asbool(value)
850
+
851
+ elif name == 'use_numpy':
852
+ settings[name] = asbool(value)
853
+
854
+ elif name == 'round_robin':
855
+ kwargs[name] = asbool(value)
856
+
857
+ elif name == 'client_name':
858
+ kwargs[name] = value
859
+
860
+ elif name in timeouts:
861
+ kwargs[name] = float(value)
862
+
863
+ elif name == 'compress_block_size':
864
+ kwargs[name] = int(value)
865
+
866
+ elif name == 'settings_is_important':
867
+ kwargs[name] = asbool(value)
868
+
869
+ elif name == 'tcp_keepalive':
870
+ try:
871
+ kwargs[name] = asbool(value)
872
+ except ValueError:
873
+ parts = value.split(',')
874
+ kwargs[name] = (
875
+ int(parts[0]), int(parts[1]), int(parts[2])
876
+ )
877
+ elif name == 'client_revision':
878
+ kwargs[name] = int(value)
879
+
880
+ # ssl
881
+ elif name == 'verify':
882
+ kwargs[name] = asbool(value)
883
+ elif name == 'ssl_version':
884
+ kwargs[name] = getattr(ssl, value)
885
+ elif name in ['ca_certs', 'ciphers', 'keyfile', 'certfile',
886
+ 'server_hostname']:
887
+ kwargs[name] = value
888
+ elif name == 'alt_hosts':
889
+ kwargs['alt_hosts'] = value
890
+ else:
891
+ settings[name] = value
892
+
893
+ if settings:
894
+ kwargs['settings'] = settings
895
+
896
+ return cls(host, **kwargs)