clickhouse-orm 3.0.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clickhouse_orm/__init__.py +14 -0
- clickhouse_orm/database.py +457 -0
- clickhouse_orm/engines.py +346 -0
- clickhouse_orm/fields.py +665 -0
- clickhouse_orm/funcs.py +1841 -0
- clickhouse_orm/migrations.py +287 -0
- clickhouse_orm/models.py +617 -0
- clickhouse_orm/query.py +701 -0
- clickhouse_orm/system_models.py +170 -0
- clickhouse_orm/utils.py +176 -0
- clickhouse_orm-3.0.1.dist-info/METADATA +90 -0
- clickhouse_orm-3.0.1.dist-info/RECORD +14 -0
- clickhouse_orm-3.0.1.dist-info/WHEEL +5 -0
- clickhouse_orm-3.0.1.dist-info/licenses/LICENSE +27 -0
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from .utils import comma_join, get_subclass_names
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger("clickhouse_orm")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Engine:
|
|
11
|
+
def create_table_sql(self, db):
|
|
12
|
+
raise NotImplementedError() # pragma: no cover
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TinyLog(Engine):
|
|
16
|
+
def create_table_sql(self, db):
|
|
17
|
+
return "TinyLog"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Log(Engine):
|
|
21
|
+
def create_table_sql(self, db):
|
|
22
|
+
return "Log"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Memory(Engine):
|
|
26
|
+
def create_table_sql(self, db):
|
|
27
|
+
return "Memory"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MergeTree(Engine):
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
date_col=None,
|
|
34
|
+
order_by=(),
|
|
35
|
+
sampling_expr=None,
|
|
36
|
+
index_granularity=8192,
|
|
37
|
+
replica_table_path=None,
|
|
38
|
+
replica_name=None,
|
|
39
|
+
partition_key=None,
|
|
40
|
+
primary_key=None,
|
|
41
|
+
):
|
|
42
|
+
assert type(order_by) in (list, tuple), "order_by must be a list or tuple"
|
|
43
|
+
assert date_col is None or isinstance(date_col, str), "date_col must be string if present"
|
|
44
|
+
assert primary_key is None or type(primary_key) in (list, tuple), "primary_key must be a list or tuple"
|
|
45
|
+
assert partition_key is None or type(partition_key) in (
|
|
46
|
+
list,
|
|
47
|
+
tuple,
|
|
48
|
+
), "partition_key must be tuple or list if present"
|
|
49
|
+
assert (replica_table_path is None) == (replica_name is None), (
|
|
50
|
+
"both replica_table_path and replica_name must be specified"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# These values conflict with each other (old and new syntax of table engines.
|
|
54
|
+
# So let's control only one of them is given.
|
|
55
|
+
assert date_col or partition_key, "You must set either date_col or partition_key"
|
|
56
|
+
self.date_col = date_col
|
|
57
|
+
self.partition_key = partition_key if partition_key else ("toYYYYMM(`%s`)" % date_col,)
|
|
58
|
+
self.primary_key = primary_key
|
|
59
|
+
|
|
60
|
+
self.order_by = order_by
|
|
61
|
+
self.sampling_expr = sampling_expr
|
|
62
|
+
self.index_granularity = index_granularity
|
|
63
|
+
self.replica_table_path = replica_table_path
|
|
64
|
+
self.replica_name = replica_name
|
|
65
|
+
|
|
66
|
+
# I changed field name for new reality and syntax
|
|
67
|
+
@property
|
|
68
|
+
def key_cols(self):
|
|
69
|
+
logger.warning(
|
|
70
|
+
"`key_cols` attribute is deprecated and may be removed in future. Use `order_by` attribute instead"
|
|
71
|
+
)
|
|
72
|
+
return self.order_by
|
|
73
|
+
|
|
74
|
+
@key_cols.setter
|
|
75
|
+
def key_cols(self, value):
|
|
76
|
+
logger.warning(
|
|
77
|
+
"`key_cols` attribute is deprecated and may be removed in future. Use `order_by` attribute instead"
|
|
78
|
+
)
|
|
79
|
+
self.order_by = value
|
|
80
|
+
|
|
81
|
+
def create_table_sql(self, db):
|
|
82
|
+
name = self.__class__.__name__
|
|
83
|
+
if self.replica_name:
|
|
84
|
+
name = "Replicated" + name
|
|
85
|
+
|
|
86
|
+
# In ClickHouse 1.1.54310 custom partitioning key was introduced
|
|
87
|
+
# https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/
|
|
88
|
+
# Let's check version and use new syntax if available
|
|
89
|
+
if db.server_version >= (1, 1, 54310):
|
|
90
|
+
partition_sql = "PARTITION BY (%s) ORDER BY (%s)" % (
|
|
91
|
+
comma_join(map(str, self.partition_key)),
|
|
92
|
+
comma_join(map(str, self.order_by)),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if self.primary_key:
|
|
96
|
+
partition_sql += " PRIMARY KEY (%s)" % comma_join(map(str, self.primary_key))
|
|
97
|
+
|
|
98
|
+
if self.sampling_expr:
|
|
99
|
+
partition_sql += " SAMPLE BY %s" % self.sampling_expr
|
|
100
|
+
|
|
101
|
+
partition_sql += " SETTINGS index_granularity=%d" % self.index_granularity
|
|
102
|
+
|
|
103
|
+
elif not self.date_col:
|
|
104
|
+
# Can't import it globally due to circular import
|
|
105
|
+
from clickhouse_orm.database import DatabaseException
|
|
106
|
+
|
|
107
|
+
raise DatabaseException(
|
|
108
|
+
"Custom partitioning is not supported before ClickHouse 1.1.54310. "
|
|
109
|
+
"Please update your server or use date_col syntax."
|
|
110
|
+
"https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/"
|
|
111
|
+
)
|
|
112
|
+
else:
|
|
113
|
+
partition_sql = ""
|
|
114
|
+
|
|
115
|
+
params = self._build_sql_params(db)
|
|
116
|
+
return "%s(%s) %s" % (name, comma_join(params), partition_sql)
|
|
117
|
+
|
|
118
|
+
def _build_sql_params(self, db):
|
|
119
|
+
params = []
|
|
120
|
+
if self.replica_name:
|
|
121
|
+
params += ["'%s'" % self.replica_table_path, "'%s'" % self.replica_name]
|
|
122
|
+
|
|
123
|
+
# In ClickHouse 1.1.54310 custom partitioning key was introduced
|
|
124
|
+
# https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/
|
|
125
|
+
# These parameters are process in create_table_sql directly.
|
|
126
|
+
# In previous ClickHouse versions this this syntax does not work.
|
|
127
|
+
if db.server_version < (1, 1, 54310):
|
|
128
|
+
params.append(self.date_col)
|
|
129
|
+
if self.sampling_expr:
|
|
130
|
+
params.append(self.sampling_expr)
|
|
131
|
+
params.append("(%s)" % comma_join(map(str(self.order_by))))
|
|
132
|
+
params.append(str(self.index_granularity))
|
|
133
|
+
|
|
134
|
+
return params
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class CollapsingMergeTree(MergeTree):
|
|
138
|
+
def __init__(
|
|
139
|
+
self,
|
|
140
|
+
date_col=None,
|
|
141
|
+
order_by=(),
|
|
142
|
+
sign_col="sign",
|
|
143
|
+
sampling_expr=None,
|
|
144
|
+
index_granularity=8192,
|
|
145
|
+
replica_table_path=None,
|
|
146
|
+
replica_name=None,
|
|
147
|
+
partition_key=None,
|
|
148
|
+
primary_key=None,
|
|
149
|
+
):
|
|
150
|
+
super().__init__(
|
|
151
|
+
date_col,
|
|
152
|
+
order_by,
|
|
153
|
+
sampling_expr,
|
|
154
|
+
index_granularity,
|
|
155
|
+
replica_table_path,
|
|
156
|
+
replica_name,
|
|
157
|
+
partition_key,
|
|
158
|
+
primary_key,
|
|
159
|
+
)
|
|
160
|
+
self.sign_col = sign_col
|
|
161
|
+
|
|
162
|
+
def _build_sql_params(self, db):
|
|
163
|
+
params = super()._build_sql_params(db)
|
|
164
|
+
params.append(self.sign_col)
|
|
165
|
+
return params
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class SummingMergeTree(MergeTree):
|
|
169
|
+
def __init__(
|
|
170
|
+
self,
|
|
171
|
+
date_col=None,
|
|
172
|
+
order_by=(),
|
|
173
|
+
summing_cols=None,
|
|
174
|
+
sampling_expr=None,
|
|
175
|
+
index_granularity=8192,
|
|
176
|
+
replica_table_path=None,
|
|
177
|
+
replica_name=None,
|
|
178
|
+
partition_key=None,
|
|
179
|
+
primary_key=None,
|
|
180
|
+
):
|
|
181
|
+
super().__init__(
|
|
182
|
+
date_col,
|
|
183
|
+
order_by,
|
|
184
|
+
sampling_expr,
|
|
185
|
+
index_granularity,
|
|
186
|
+
replica_table_path,
|
|
187
|
+
replica_name,
|
|
188
|
+
partition_key,
|
|
189
|
+
primary_key,
|
|
190
|
+
)
|
|
191
|
+
assert type is None or type(summing_cols) in (list, tuple), "summing_cols must be a list or tuple"
|
|
192
|
+
self.summing_cols = summing_cols
|
|
193
|
+
|
|
194
|
+
def _build_sql_params(self, db):
|
|
195
|
+
params = super()._build_sql_params(db)
|
|
196
|
+
if self.summing_cols:
|
|
197
|
+
params.append("(%s)" % comma_join(self.summing_cols))
|
|
198
|
+
return params
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
class ReplacingMergeTree(MergeTree):
|
|
202
|
+
def __init__(
|
|
203
|
+
self,
|
|
204
|
+
date_col=None,
|
|
205
|
+
order_by=(),
|
|
206
|
+
ver_col=None,
|
|
207
|
+
sampling_expr=None,
|
|
208
|
+
index_granularity=8192,
|
|
209
|
+
replica_table_path=None,
|
|
210
|
+
replica_name=None,
|
|
211
|
+
partition_key=None,
|
|
212
|
+
primary_key=None,
|
|
213
|
+
):
|
|
214
|
+
super().__init__(
|
|
215
|
+
date_col,
|
|
216
|
+
order_by,
|
|
217
|
+
sampling_expr,
|
|
218
|
+
index_granularity,
|
|
219
|
+
replica_table_path,
|
|
220
|
+
replica_name,
|
|
221
|
+
partition_key,
|
|
222
|
+
primary_key,
|
|
223
|
+
)
|
|
224
|
+
self.ver_col = ver_col
|
|
225
|
+
|
|
226
|
+
def _build_sql_params(self, db):
|
|
227
|
+
params = super()._build_sql_params(db)
|
|
228
|
+
if self.ver_col:
|
|
229
|
+
params.append(self.ver_col)
|
|
230
|
+
return params
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class Buffer(Engine):
|
|
234
|
+
"""
|
|
235
|
+
Buffers the data to write in RAM, periodically flushing it to another table.
|
|
236
|
+
Must be used in conjuction with a `BufferModel`.
|
|
237
|
+
Read more [here](https://clickhouse.tech/docs/en/engines/table-engines/special/buffer/).
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
# Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
|
|
241
|
+
def __init__(
|
|
242
|
+
self,
|
|
243
|
+
main_model,
|
|
244
|
+
num_layers=16,
|
|
245
|
+
min_time=10,
|
|
246
|
+
max_time=100,
|
|
247
|
+
min_rows=10000,
|
|
248
|
+
max_rows=1000000,
|
|
249
|
+
min_bytes=10000000,
|
|
250
|
+
max_bytes=100000000,
|
|
251
|
+
):
|
|
252
|
+
self.main_model = main_model
|
|
253
|
+
self.num_layers = num_layers
|
|
254
|
+
self.min_time = min_time
|
|
255
|
+
self.max_time = max_time
|
|
256
|
+
self.min_rows = min_rows
|
|
257
|
+
self.max_rows = max_rows
|
|
258
|
+
self.min_bytes = min_bytes
|
|
259
|
+
self.max_bytes = max_bytes
|
|
260
|
+
|
|
261
|
+
def create_table_sql(self, db):
|
|
262
|
+
# Overriden create_table_sql example:
|
|
263
|
+
# sql = 'ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)'
|
|
264
|
+
sql = "ENGINE = Buffer(`%s`, `%s`, %d, %d, %d, %d, %d, %d, %d)" % (
|
|
265
|
+
db.db_name,
|
|
266
|
+
self.main_model.table_name(),
|
|
267
|
+
self.num_layers,
|
|
268
|
+
self.min_time,
|
|
269
|
+
self.max_time,
|
|
270
|
+
self.min_rows,
|
|
271
|
+
self.max_rows,
|
|
272
|
+
self.min_bytes,
|
|
273
|
+
self.max_bytes,
|
|
274
|
+
)
|
|
275
|
+
return sql
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class Merge(Engine):
|
|
279
|
+
"""
|
|
280
|
+
The Merge engine (not to be confused with MergeTree) does not store data itself,
|
|
281
|
+
but allows reading from any number of other tables simultaneously.
|
|
282
|
+
Writing to a table is not supported
|
|
283
|
+
https://clickhouse.tech/docs/en/engines/table-engines/special/merge/
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
def __init__(self, table_regex):
|
|
287
|
+
assert isinstance(table_regex, str), "'table_regex' parameter must be string"
|
|
288
|
+
self.table_regex = table_regex
|
|
289
|
+
|
|
290
|
+
def create_table_sql(self, db):
|
|
291
|
+
return "Merge(`%s`, '%s')" % (db.db_name, self.table_regex)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
class Distributed(Engine):
|
|
295
|
+
"""
|
|
296
|
+
The Distributed engine by itself does not store data,
|
|
297
|
+
but allows distributed query processing on multiple servers.
|
|
298
|
+
Reading is automatically parallelized.
|
|
299
|
+
During a read, the table indexes on remote servers are used, if there are any.
|
|
300
|
+
|
|
301
|
+
See full documentation here
|
|
302
|
+
https://clickhouse.tech/docs/en/engines/table-engines/special/distributed/
|
|
303
|
+
"""
|
|
304
|
+
|
|
305
|
+
def __init__(self, cluster, table=None, sharding_key=None):
|
|
306
|
+
"""
|
|
307
|
+
- `cluster`: what cluster to access data from
|
|
308
|
+
- `table`: underlying table that actually stores data.
|
|
309
|
+
If you are not specifying any table here, ensure that it can be inferred
|
|
310
|
+
from your model's superclass (see models.DistributedModel.fix_engine_table)
|
|
311
|
+
- `sharding_key`: how to distribute data among shards when inserting
|
|
312
|
+
straightly into Distributed table, optional
|
|
313
|
+
"""
|
|
314
|
+
self.cluster = cluster
|
|
315
|
+
self.table = table
|
|
316
|
+
self.sharding_key = sharding_key
|
|
317
|
+
|
|
318
|
+
@property
|
|
319
|
+
def table_name(self):
|
|
320
|
+
# TODO: circular import is bad
|
|
321
|
+
from .models import ModelBase
|
|
322
|
+
|
|
323
|
+
table = self.table
|
|
324
|
+
|
|
325
|
+
if isinstance(table, ModelBase):
|
|
326
|
+
return table.table_name()
|
|
327
|
+
|
|
328
|
+
return table
|
|
329
|
+
|
|
330
|
+
def create_table_sql(self, db):
|
|
331
|
+
name = self.__class__.__name__
|
|
332
|
+
params = self._build_sql_params(db)
|
|
333
|
+
return "%s(%s)" % (name, ", ".join(params))
|
|
334
|
+
|
|
335
|
+
def _build_sql_params(self, db):
|
|
336
|
+
if self.table_name is None:
|
|
337
|
+
raise ValueError(f"Cannot create {self.__class__.__name__} engine: specify an underlying table")
|
|
338
|
+
|
|
339
|
+
params = ["`%s`" % p for p in [self.cluster, db.db_name, self.table_name]]
|
|
340
|
+
if self.sharding_key:
|
|
341
|
+
params.append(self.sharding_key)
|
|
342
|
+
return params
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
# Expose only relevant classes in import *
|
|
346
|
+
__all__ = get_subclass_names(locals(), Engine)
|