clickhouse-orm 3.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,346 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from .utils import comma_join, get_subclass_names
6
+
7
+ logger = logging.getLogger("clickhouse_orm")
8
+
9
+
10
+ class Engine:
11
+ def create_table_sql(self, db):
12
+ raise NotImplementedError() # pragma: no cover
13
+
14
+
15
+ class TinyLog(Engine):
16
+ def create_table_sql(self, db):
17
+ return "TinyLog"
18
+
19
+
20
+ class Log(Engine):
21
+ def create_table_sql(self, db):
22
+ return "Log"
23
+
24
+
25
+ class Memory(Engine):
26
+ def create_table_sql(self, db):
27
+ return "Memory"
28
+
29
+
30
+ class MergeTree(Engine):
31
+ def __init__(
32
+ self,
33
+ date_col=None,
34
+ order_by=(),
35
+ sampling_expr=None,
36
+ index_granularity=8192,
37
+ replica_table_path=None,
38
+ replica_name=None,
39
+ partition_key=None,
40
+ primary_key=None,
41
+ ):
42
+ assert type(order_by) in (list, tuple), "order_by must be a list or tuple"
43
+ assert date_col is None or isinstance(date_col, str), "date_col must be string if present"
44
+ assert primary_key is None or type(primary_key) in (list, tuple), "primary_key must be a list or tuple"
45
+ assert partition_key is None or type(partition_key) in (
46
+ list,
47
+ tuple,
48
+ ), "partition_key must be tuple or list if present"
49
+ assert (replica_table_path is None) == (replica_name is None), (
50
+ "both replica_table_path and replica_name must be specified"
51
+ )
52
+
53
+ # These values conflict with each other (old and new syntax of table engines.
54
+ # So let's control only one of them is given.
55
+ assert date_col or partition_key, "You must set either date_col or partition_key"
56
+ self.date_col = date_col
57
+ self.partition_key = partition_key if partition_key else ("toYYYYMM(`%s`)" % date_col,)
58
+ self.primary_key = primary_key
59
+
60
+ self.order_by = order_by
61
+ self.sampling_expr = sampling_expr
62
+ self.index_granularity = index_granularity
63
+ self.replica_table_path = replica_table_path
64
+ self.replica_name = replica_name
65
+
66
+ # I changed field name for new reality and syntax
67
+ @property
68
+ def key_cols(self):
69
+ logger.warning(
70
+ "`key_cols` attribute is deprecated and may be removed in future. Use `order_by` attribute instead"
71
+ )
72
+ return self.order_by
73
+
74
+ @key_cols.setter
75
+ def key_cols(self, value):
76
+ logger.warning(
77
+ "`key_cols` attribute is deprecated and may be removed in future. Use `order_by` attribute instead"
78
+ )
79
+ self.order_by = value
80
+
81
+ def create_table_sql(self, db):
82
+ name = self.__class__.__name__
83
+ if self.replica_name:
84
+ name = "Replicated" + name
85
+
86
+ # In ClickHouse 1.1.54310 custom partitioning key was introduced
87
+ # https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/
88
+ # Let's check version and use new syntax if available
89
+ if db.server_version >= (1, 1, 54310):
90
+ partition_sql = "PARTITION BY (%s) ORDER BY (%s)" % (
91
+ comma_join(map(str, self.partition_key)),
92
+ comma_join(map(str, self.order_by)),
93
+ )
94
+
95
+ if self.primary_key:
96
+ partition_sql += " PRIMARY KEY (%s)" % comma_join(map(str, self.primary_key))
97
+
98
+ if self.sampling_expr:
99
+ partition_sql += " SAMPLE BY %s" % self.sampling_expr
100
+
101
+ partition_sql += " SETTINGS index_granularity=%d" % self.index_granularity
102
+
103
+ elif not self.date_col:
104
+ # Can't import it globally due to circular import
105
+ from clickhouse_orm.database import DatabaseException
106
+
107
+ raise DatabaseException(
108
+ "Custom partitioning is not supported before ClickHouse 1.1.54310. "
109
+ "Please update your server or use date_col syntax."
110
+ "https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/"
111
+ )
112
+ else:
113
+ partition_sql = ""
114
+
115
+ params = self._build_sql_params(db)
116
+ return "%s(%s) %s" % (name, comma_join(params), partition_sql)
117
+
118
+ def _build_sql_params(self, db):
119
+ params = []
120
+ if self.replica_name:
121
+ params += ["'%s'" % self.replica_table_path, "'%s'" % self.replica_name]
122
+
123
+ # In ClickHouse 1.1.54310 custom partitioning key was introduced
124
+ # https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/
125
+ # These parameters are process in create_table_sql directly.
126
+ # In previous ClickHouse versions this this syntax does not work.
127
+ if db.server_version < (1, 1, 54310):
128
+ params.append(self.date_col)
129
+ if self.sampling_expr:
130
+ params.append(self.sampling_expr)
131
+ params.append("(%s)" % comma_join(map(str(self.order_by))))
132
+ params.append(str(self.index_granularity))
133
+
134
+ return params
135
+
136
+
137
+ class CollapsingMergeTree(MergeTree):
138
+ def __init__(
139
+ self,
140
+ date_col=None,
141
+ order_by=(),
142
+ sign_col="sign",
143
+ sampling_expr=None,
144
+ index_granularity=8192,
145
+ replica_table_path=None,
146
+ replica_name=None,
147
+ partition_key=None,
148
+ primary_key=None,
149
+ ):
150
+ super().__init__(
151
+ date_col,
152
+ order_by,
153
+ sampling_expr,
154
+ index_granularity,
155
+ replica_table_path,
156
+ replica_name,
157
+ partition_key,
158
+ primary_key,
159
+ )
160
+ self.sign_col = sign_col
161
+
162
+ def _build_sql_params(self, db):
163
+ params = super()._build_sql_params(db)
164
+ params.append(self.sign_col)
165
+ return params
166
+
167
+
168
+ class SummingMergeTree(MergeTree):
169
+ def __init__(
170
+ self,
171
+ date_col=None,
172
+ order_by=(),
173
+ summing_cols=None,
174
+ sampling_expr=None,
175
+ index_granularity=8192,
176
+ replica_table_path=None,
177
+ replica_name=None,
178
+ partition_key=None,
179
+ primary_key=None,
180
+ ):
181
+ super().__init__(
182
+ date_col,
183
+ order_by,
184
+ sampling_expr,
185
+ index_granularity,
186
+ replica_table_path,
187
+ replica_name,
188
+ partition_key,
189
+ primary_key,
190
+ )
191
+ assert type is None or type(summing_cols) in (list, tuple), "summing_cols must be a list or tuple"
192
+ self.summing_cols = summing_cols
193
+
194
+ def _build_sql_params(self, db):
195
+ params = super()._build_sql_params(db)
196
+ if self.summing_cols:
197
+ params.append("(%s)" % comma_join(self.summing_cols))
198
+ return params
199
+
200
+
201
+ class ReplacingMergeTree(MergeTree):
202
+ def __init__(
203
+ self,
204
+ date_col=None,
205
+ order_by=(),
206
+ ver_col=None,
207
+ sampling_expr=None,
208
+ index_granularity=8192,
209
+ replica_table_path=None,
210
+ replica_name=None,
211
+ partition_key=None,
212
+ primary_key=None,
213
+ ):
214
+ super().__init__(
215
+ date_col,
216
+ order_by,
217
+ sampling_expr,
218
+ index_granularity,
219
+ replica_table_path,
220
+ replica_name,
221
+ partition_key,
222
+ primary_key,
223
+ )
224
+ self.ver_col = ver_col
225
+
226
+ def _build_sql_params(self, db):
227
+ params = super()._build_sql_params(db)
228
+ if self.ver_col:
229
+ params.append(self.ver_col)
230
+ return params
231
+
232
+
233
+ class Buffer(Engine):
234
+ """
235
+ Buffers the data to write in RAM, periodically flushing it to another table.
236
+ Must be used in conjuction with a `BufferModel`.
237
+ Read more [here](https://clickhouse.tech/docs/en/engines/table-engines/special/buffer/).
238
+ """
239
+
240
+ # Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
241
+ def __init__(
242
+ self,
243
+ main_model,
244
+ num_layers=16,
245
+ min_time=10,
246
+ max_time=100,
247
+ min_rows=10000,
248
+ max_rows=1000000,
249
+ min_bytes=10000000,
250
+ max_bytes=100000000,
251
+ ):
252
+ self.main_model = main_model
253
+ self.num_layers = num_layers
254
+ self.min_time = min_time
255
+ self.max_time = max_time
256
+ self.min_rows = min_rows
257
+ self.max_rows = max_rows
258
+ self.min_bytes = min_bytes
259
+ self.max_bytes = max_bytes
260
+
261
+ def create_table_sql(self, db):
262
+ # Overriden create_table_sql example:
263
+ # sql = 'ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)'
264
+ sql = "ENGINE = Buffer(`%s`, `%s`, %d, %d, %d, %d, %d, %d, %d)" % (
265
+ db.db_name,
266
+ self.main_model.table_name(),
267
+ self.num_layers,
268
+ self.min_time,
269
+ self.max_time,
270
+ self.min_rows,
271
+ self.max_rows,
272
+ self.min_bytes,
273
+ self.max_bytes,
274
+ )
275
+ return sql
276
+
277
+
278
+ class Merge(Engine):
279
+ """
280
+ The Merge engine (not to be confused with MergeTree) does not store data itself,
281
+ but allows reading from any number of other tables simultaneously.
282
+ Writing to a table is not supported
283
+ https://clickhouse.tech/docs/en/engines/table-engines/special/merge/
284
+ """
285
+
286
+ def __init__(self, table_regex):
287
+ assert isinstance(table_regex, str), "'table_regex' parameter must be string"
288
+ self.table_regex = table_regex
289
+
290
+ def create_table_sql(self, db):
291
+ return "Merge(`%s`, '%s')" % (db.db_name, self.table_regex)
292
+
293
+
294
+ class Distributed(Engine):
295
+ """
296
+ The Distributed engine by itself does not store data,
297
+ but allows distributed query processing on multiple servers.
298
+ Reading is automatically parallelized.
299
+ During a read, the table indexes on remote servers are used, if there are any.
300
+
301
+ See full documentation here
302
+ https://clickhouse.tech/docs/en/engines/table-engines/special/distributed/
303
+ """
304
+
305
+ def __init__(self, cluster, table=None, sharding_key=None):
306
+ """
307
+ - `cluster`: what cluster to access data from
308
+ - `table`: underlying table that actually stores data.
309
+ If you are not specifying any table here, ensure that it can be inferred
310
+ from your model's superclass (see models.DistributedModel.fix_engine_table)
311
+ - `sharding_key`: how to distribute data among shards when inserting
312
+ straightly into Distributed table, optional
313
+ """
314
+ self.cluster = cluster
315
+ self.table = table
316
+ self.sharding_key = sharding_key
317
+
318
+ @property
319
+ def table_name(self):
320
+ # TODO: circular import is bad
321
+ from .models import ModelBase
322
+
323
+ table = self.table
324
+
325
+ if isinstance(table, ModelBase):
326
+ return table.table_name()
327
+
328
+ return table
329
+
330
+ def create_table_sql(self, db):
331
+ name = self.__class__.__name__
332
+ params = self._build_sql_params(db)
333
+ return "%s(%s)" % (name, ", ".join(params))
334
+
335
+ def _build_sql_params(self, db):
336
+ if self.table_name is None:
337
+ raise ValueError(f"Cannot create {self.__class__.__name__} engine: specify an underlying table")
338
+
339
+ params = ["`%s`" % p for p in [self.cluster, db.db_name, self.table_name]]
340
+ if self.sharding_key:
341
+ params.append(self.sharding_key)
342
+ return params
343
+
344
+
345
+ # Expose only relevant classes in import *
346
+ __all__ = get_subclass_names(locals(), Engine)