clickhouse-orm 2.2.2__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clickhouse_orm-3.1.0/PKG-INFO +91 -0
- clickhouse_orm-3.1.0/README.md +63 -0
- {clickhouse_orm-2.2.2 → clickhouse_orm-3.1.0}/clickhouse_orm/__init__.py +2 -0
- {clickhouse_orm-2.2.2 → clickhouse_orm-3.1.0}/clickhouse_orm/database.py +57 -15
- {clickhouse_orm-2.2.2 → clickhouse_orm-3.1.0}/clickhouse_orm/engines.py +13 -11
- {clickhouse_orm-2.2.2 → clickhouse_orm-3.1.0}/clickhouse_orm/fields.py +46 -70
- {clickhouse_orm-2.2.2 → clickhouse_orm-3.1.0}/clickhouse_orm/funcs.py +18 -9
- {clickhouse_orm-2.2.2 → clickhouse_orm-3.1.0}/clickhouse_orm/migrations.py +13 -9
- {clickhouse_orm-2.2.2 → clickhouse_orm-3.1.0}/clickhouse_orm/models.py +25 -21
- {clickhouse_orm-2.2.2 → clickhouse_orm-3.1.0}/clickhouse_orm/query.py +10 -9
- {clickhouse_orm-2.2.2 → clickhouse_orm-3.1.0}/clickhouse_orm/system_models.py +3 -0
- {clickhouse_orm-2.2.2 → clickhouse_orm-3.1.0}/clickhouse_orm/utils.py +22 -10
- clickhouse_orm-3.1.0/pyproject.toml +95 -0
- clickhouse_orm-2.2.2/PKG-INFO +0 -26
- clickhouse_orm-2.2.2/pyproject.toml +0 -52
- clickhouse_orm-2.2.2/setup.py +0 -30
- {clickhouse_orm-2.2.2 → clickhouse_orm-3.1.0}/LICENSE +0 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: clickhouse_orm
|
|
3
|
+
Version: 3.1.0
|
|
4
|
+
Summary: A simple ORM for working with the Clickhouse database. Maintainance fork of infi.clickhouse_orm.
|
|
5
|
+
Author-email: Oliver Margetts <oliver.margetts@gmail.com>
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Classifier: Intended Audience :: Developers
|
|
8
|
+
Classifier: Intended Audience :: System Administrators
|
|
9
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Classifier: Topic :: Database
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: requests
|
|
20
|
+
Requires-Dist: pytz
|
|
21
|
+
Requires-Dist: docker==7.1.0 ; extra == "dev"
|
|
22
|
+
Requires-Dist: pytest==9.0.2 ; extra == "dev"
|
|
23
|
+
Requires-Dist: ruff==0.14.14 ; extra == "dev"
|
|
24
|
+
Project-URL: Homepage, https://github.com/SuadeLabs/clickhouse_orm
|
|
25
|
+
Project-URL: Repository, https://github.com/SuadeLabs/clickhouse_orm
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
|
|
28
|
+
A fork of [infi.clikchouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) aimed at more frequent maintenance and bugfixes.
|
|
29
|
+
|
|
30
|
+
[](https://github.com/SuadeLabs/clickhouse_orm/actions/workflows/python-test.yml)
|
|
31
|
+

|
|
32
|
+
|
|
33
|
+
Introduction
|
|
34
|
+
============
|
|
35
|
+
|
|
36
|
+
This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/).
|
|
37
|
+
It allows you to define model classes whose instances can be written to the database and read from it.
|
|
38
|
+
|
|
39
|
+
Let's jump right in with a simple example of monitoring CPU usage. First we need to define the model class,
|
|
40
|
+
connect to the database and create a table for the model:
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from clickhouse_orm import Database, Model, DateTimeField, UInt16Field, Float32Field, Memory, F
|
|
44
|
+
|
|
45
|
+
class CPUStats(Model):
|
|
46
|
+
|
|
47
|
+
timestamp = DateTimeField()
|
|
48
|
+
cpu_id = UInt16Field()
|
|
49
|
+
cpu_percent = Float32Field()
|
|
50
|
+
|
|
51
|
+
engine = Memory()
|
|
52
|
+
|
|
53
|
+
db = Database('demo')
|
|
54
|
+
db.create_table(CPUStats)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Now we can collect usage statistics per CPU, and write them to the database:
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
import psutil, time, datetime
|
|
61
|
+
|
|
62
|
+
psutil.cpu_percent(percpu=True) # first sample should be discarded
|
|
63
|
+
with db.session(): # use a requests session for efficiency
|
|
64
|
+
while True:
|
|
65
|
+
time.sleep(1)
|
|
66
|
+
stats = psutil.cpu_percent(percpu=True)
|
|
67
|
+
timestamp = datetime.datetime.now()
|
|
68
|
+
db.insert([
|
|
69
|
+
CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent)
|
|
70
|
+
for cpu_id, cpu_percent in enumerate(stats)
|
|
71
|
+
])
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Querying the table is easy, using either the query builder or raw SQL:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
# Calculate what percentage of the time CPU 1 was over 95% busy
|
|
78
|
+
queryset = CPUStats.objects_in(db)
|
|
79
|
+
total = queryset.filter(CPUStats.cpu_id == 1).count()
|
|
80
|
+
busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
|
|
81
|
+
print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total))
|
|
82
|
+
|
|
83
|
+
# Calculate the average usage per CPU
|
|
84
|
+
for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
|
|
85
|
+
print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row))
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
This and other examples can be found in the `examples` folder.
|
|
89
|
+
|
|
90
|
+
To learn more please visit the [documentation](docs/toc.md).
|
|
91
|
+
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
A fork of [infi.clikchouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) aimed at more frequent maintenance and bugfixes.
|
|
2
|
+
|
|
3
|
+
[](https://github.com/SuadeLabs/clickhouse_orm/actions/workflows/python-test.yml)
|
|
4
|
+

|
|
5
|
+
|
|
6
|
+
Introduction
|
|
7
|
+
============
|
|
8
|
+
|
|
9
|
+
This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/).
|
|
10
|
+
It allows you to define model classes whose instances can be written to the database and read from it.
|
|
11
|
+
|
|
12
|
+
Let's jump right in with a simple example of monitoring CPU usage. First we need to define the model class,
|
|
13
|
+
connect to the database and create a table for the model:
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from clickhouse_orm import Database, Model, DateTimeField, UInt16Field, Float32Field, Memory, F
|
|
17
|
+
|
|
18
|
+
class CPUStats(Model):
|
|
19
|
+
|
|
20
|
+
timestamp = DateTimeField()
|
|
21
|
+
cpu_id = UInt16Field()
|
|
22
|
+
cpu_percent = Float32Field()
|
|
23
|
+
|
|
24
|
+
engine = Memory()
|
|
25
|
+
|
|
26
|
+
db = Database('demo')
|
|
27
|
+
db.create_table(CPUStats)
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Now we can collect usage statistics per CPU, and write them to the database:
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import psutil, time, datetime
|
|
34
|
+
|
|
35
|
+
psutil.cpu_percent(percpu=True) # first sample should be discarded
|
|
36
|
+
with db.session(): # use a requests session for efficiency
|
|
37
|
+
while True:
|
|
38
|
+
time.sleep(1)
|
|
39
|
+
stats = psutil.cpu_percent(percpu=True)
|
|
40
|
+
timestamp = datetime.datetime.now()
|
|
41
|
+
db.insert([
|
|
42
|
+
CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent)
|
|
43
|
+
for cpu_id, cpu_percent in enumerate(stats)
|
|
44
|
+
])
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Querying the table is easy, using either the query builder or raw SQL:
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
# Calculate what percentage of the time CPU 1 was over 95% busy
|
|
51
|
+
queryset = CPUStats.objects_in(db)
|
|
52
|
+
total = queryset.filter(CPUStats.cpu_id == 1).count()
|
|
53
|
+
busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
|
|
54
|
+
print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total))
|
|
55
|
+
|
|
56
|
+
# Calculate the average usage per CPU
|
|
57
|
+
for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
|
|
58
|
+
print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row))
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
This and other examples can be found in the `examples` folder.
|
|
62
|
+
|
|
63
|
+
To learn more please visit the [documentation](docs/toc.md).
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import datetime
|
|
2
4
|
import logging
|
|
3
5
|
import re
|
|
6
|
+
from contextlib import contextmanager
|
|
4
7
|
from math import ceil
|
|
5
8
|
from string import Template
|
|
6
9
|
|
|
@@ -13,13 +16,11 @@ from .utils import Page, import_submodules, parse_tsv
|
|
|
13
16
|
logger = logging.getLogger("clickhouse_orm")
|
|
14
17
|
|
|
15
18
|
|
|
16
|
-
class DatabaseException(Exception):
|
|
19
|
+
class DatabaseException(Exception): # noqa: N818
|
|
17
20
|
"""
|
|
18
21
|
Raised when a database operation fails.
|
|
19
22
|
"""
|
|
20
23
|
|
|
21
|
-
pass
|
|
22
|
-
|
|
23
24
|
|
|
24
25
|
class ServerError(DatabaseException):
|
|
25
26
|
"""
|
|
@@ -35,7 +36,7 @@ class ServerError(DatabaseException):
|
|
|
35
36
|
# just skip custom init
|
|
36
37
|
# if non-standard message format
|
|
37
38
|
self.message = message
|
|
38
|
-
super(
|
|
39
|
+
super().__init__(message)
|
|
39
40
|
|
|
40
41
|
ERROR_PATTERNS = (
|
|
41
42
|
# ClickHouse prior to v19.3.3
|
|
@@ -55,6 +56,14 @@ class ServerError(DatabaseException):
|
|
|
55
56
|
""",
|
|
56
57
|
re.VERBOSE | re.DOTALL,
|
|
57
58
|
),
|
|
59
|
+
# ClickHouse v21+
|
|
60
|
+
re.compile(
|
|
61
|
+
r"""
|
|
62
|
+
Code:\ (?P<code>\d+).
|
|
63
|
+
\ (?P<type1>[^ \n]+):\ (?P<msg>.+)
|
|
64
|
+
""",
|
|
65
|
+
re.VERBOSE | re.DOTALL,
|
|
66
|
+
),
|
|
58
67
|
)
|
|
59
68
|
|
|
60
69
|
@classmethod
|
|
@@ -75,19 +84,21 @@ class ServerError(DatabaseException):
|
|
|
75
84
|
|
|
76
85
|
def __str__(self):
|
|
77
86
|
if self.code is not None:
|
|
78
|
-
return "{} ({
|
|
87
|
+
return f"{self.message} ({self.code})"
|
|
79
88
|
|
|
80
89
|
|
|
81
|
-
class Database
|
|
90
|
+
class Database:
|
|
82
91
|
"""
|
|
83
92
|
Database instances connect to a specific ClickHouse database for running queries,
|
|
84
93
|
inserting data and other operations.
|
|
85
94
|
"""
|
|
86
95
|
|
|
96
|
+
_default_url = "http://localhost:8123/"
|
|
97
|
+
|
|
87
98
|
def __init__(
|
|
88
99
|
self,
|
|
89
100
|
db_name,
|
|
90
|
-
db_url=
|
|
101
|
+
db_url=None,
|
|
91
102
|
username=None,
|
|
92
103
|
password=None,
|
|
93
104
|
readonly=False,
|
|
@@ -95,6 +106,7 @@ class Database(object):
|
|
|
95
106
|
timeout=60,
|
|
96
107
|
verify_ssl_cert=True,
|
|
97
108
|
log_statements=False,
|
|
109
|
+
session=None,
|
|
98
110
|
):
|
|
99
111
|
"""
|
|
100
112
|
Initializes a database instance. Unless it's readonly, the database will be
|
|
@@ -111,13 +123,14 @@ class Database(object):
|
|
|
111
123
|
- `log_statements`: when True, all database statements are logged.
|
|
112
124
|
"""
|
|
113
125
|
self.db_name = db_name
|
|
114
|
-
self.db_url = db_url
|
|
115
|
-
self.readonly = False
|
|
126
|
+
self.db_url = db_url or self._default_url
|
|
127
|
+
self.readonly = self.connection_readonly = False
|
|
116
128
|
self.timeout = timeout
|
|
117
|
-
self.
|
|
118
|
-
self.request_session
|
|
119
|
-
|
|
120
|
-
|
|
129
|
+
self.verify_ssl_cert = verify_ssl_cert
|
|
130
|
+
self.request_session = None
|
|
131
|
+
self.__username = username
|
|
132
|
+
self.__password = password
|
|
133
|
+
|
|
121
134
|
self.log_statements = log_statements
|
|
122
135
|
self.settings = {}
|
|
123
136
|
self.db_exists = False # this is required before running _is_existing_database
|
|
@@ -137,6 +150,22 @@ class Database(object):
|
|
|
137
150
|
# Version 19.0 and above support LowCardinality
|
|
138
151
|
self.has_low_cardinality_support = self.server_version >= (19, 0)
|
|
139
152
|
|
|
153
|
+
@contextmanager
|
|
154
|
+
def session(self):
|
|
155
|
+
"""Contextmanager to use a persistent session for requests.
|
|
156
|
+
|
|
157
|
+
This can be quicker if making lots of small queries.
|
|
158
|
+
"""
|
|
159
|
+
with requests.Session() as session:
|
|
160
|
+
session.verify = self.verify_ssl_cert
|
|
161
|
+
if self.__username:
|
|
162
|
+
session.auth = (self.__username, self.__password or "")
|
|
163
|
+
self.request_session = session
|
|
164
|
+
try:
|
|
165
|
+
yield self
|
|
166
|
+
finally:
|
|
167
|
+
self.request_session = None
|
|
168
|
+
|
|
140
169
|
def create_database(self):
|
|
141
170
|
"""
|
|
142
171
|
Creates the database on the ClickHouse server if it does not already exist.
|
|
@@ -388,7 +417,20 @@ class Database(object):
|
|
|
388
417
|
if self.log_statements:
|
|
389
418
|
logger.info(data)
|
|
390
419
|
params = self._build_params(settings)
|
|
391
|
-
|
|
420
|
+
|
|
421
|
+
if self.request_session:
|
|
422
|
+
r = self.request_session.post(self.db_url, params=params, data=data, stream=stream, timeout=self.timeout)
|
|
423
|
+
else:
|
|
424
|
+
r = requests.post(
|
|
425
|
+
self.db_url,
|
|
426
|
+
params=params,
|
|
427
|
+
data=data,
|
|
428
|
+
stream=stream,
|
|
429
|
+
timeout=self.timeout,
|
|
430
|
+
verify=self.verify_ssl_cert,
|
|
431
|
+
auth=(self.__username, self.__password or "") if self.__username else None,
|
|
432
|
+
)
|
|
433
|
+
|
|
392
434
|
if r.status_code != 200:
|
|
393
435
|
raise ServerError(r.text)
|
|
394
436
|
return r
|
|
@@ -432,7 +474,7 @@ class Database(object):
|
|
|
432
474
|
except ServerError as e:
|
|
433
475
|
logger.exception("Cannot determine server version (%s), assuming 1.1.0", e)
|
|
434
476
|
ver = "1.1.0"
|
|
435
|
-
return tuple(int(n) for n in ver.split(".")) if as_tuple else ver
|
|
477
|
+
return tuple(int(n) for n in ver.split(".") if n.isdigit()) if as_tuple else ver
|
|
436
478
|
|
|
437
479
|
def _is_existing_database(self):
|
|
438
480
|
r = self._send("SELECT count() FROM system.databases WHERE name = '%s'" % self.db_name)
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
|
|
3
5
|
from .utils import comma_join, get_subclass_names
|
|
@@ -5,7 +7,7 @@ from .utils import comma_join, get_subclass_names
|
|
|
5
7
|
logger = logging.getLogger("clickhouse_orm")
|
|
6
8
|
|
|
7
9
|
|
|
8
|
-
class Engine
|
|
10
|
+
class Engine:
|
|
9
11
|
def create_table_sql(self, db):
|
|
10
12
|
raise NotImplementedError() # pragma: no cover
|
|
11
13
|
|
|
@@ -44,9 +46,9 @@ class MergeTree(Engine):
|
|
|
44
46
|
list,
|
|
45
47
|
tuple,
|
|
46
48
|
), "partition_key must be tuple or list if present"
|
|
47
|
-
assert (replica_table_path is None) == (
|
|
48
|
-
replica_name
|
|
49
|
-
)
|
|
49
|
+
assert (replica_table_path is None) == (replica_name is None), (
|
|
50
|
+
"both replica_table_path and replica_name must be specified"
|
|
51
|
+
)
|
|
50
52
|
|
|
51
53
|
# These values conflict with each other (old and new syntax of table engines.
|
|
52
54
|
# So let's control only one of them is given.
|
|
@@ -145,7 +147,7 @@ class CollapsingMergeTree(MergeTree):
|
|
|
145
147
|
partition_key=None,
|
|
146
148
|
primary_key=None,
|
|
147
149
|
):
|
|
148
|
-
super(
|
|
150
|
+
super().__init__(
|
|
149
151
|
date_col,
|
|
150
152
|
order_by,
|
|
151
153
|
sampling_expr,
|
|
@@ -158,7 +160,7 @@ class CollapsingMergeTree(MergeTree):
|
|
|
158
160
|
self.sign_col = sign_col
|
|
159
161
|
|
|
160
162
|
def _build_sql_params(self, db):
|
|
161
|
-
params = super(
|
|
163
|
+
params = super()._build_sql_params(db)
|
|
162
164
|
params.append(self.sign_col)
|
|
163
165
|
return params
|
|
164
166
|
|
|
@@ -176,7 +178,7 @@ class SummingMergeTree(MergeTree):
|
|
|
176
178
|
partition_key=None,
|
|
177
179
|
primary_key=None,
|
|
178
180
|
):
|
|
179
|
-
super(
|
|
181
|
+
super().__init__(
|
|
180
182
|
date_col,
|
|
181
183
|
order_by,
|
|
182
184
|
sampling_expr,
|
|
@@ -190,7 +192,7 @@ class SummingMergeTree(MergeTree):
|
|
|
190
192
|
self.summing_cols = summing_cols
|
|
191
193
|
|
|
192
194
|
def _build_sql_params(self, db):
|
|
193
|
-
params = super(
|
|
195
|
+
params = super()._build_sql_params(db)
|
|
194
196
|
if self.summing_cols:
|
|
195
197
|
params.append("(%s)" % comma_join(self.summing_cols))
|
|
196
198
|
return params
|
|
@@ -209,7 +211,7 @@ class ReplacingMergeTree(MergeTree):
|
|
|
209
211
|
partition_key=None,
|
|
210
212
|
primary_key=None,
|
|
211
213
|
):
|
|
212
|
-
super(
|
|
214
|
+
super().__init__(
|
|
213
215
|
date_col,
|
|
214
216
|
order_by,
|
|
215
217
|
sampling_expr,
|
|
@@ -222,7 +224,7 @@ class ReplacingMergeTree(MergeTree):
|
|
|
222
224
|
self.ver_col = ver_col
|
|
223
225
|
|
|
224
226
|
def _build_sql_params(self, db):
|
|
225
|
-
params = super(
|
|
227
|
+
params = super()._build_sql_params(db)
|
|
226
228
|
if self.ver_col:
|
|
227
229
|
params.append(self.ver_col)
|
|
228
230
|
return params
|
|
@@ -332,7 +334,7 @@ class Distributed(Engine):
|
|
|
332
334
|
|
|
333
335
|
def _build_sql_params(self, db):
|
|
334
336
|
if self.table_name is None:
|
|
335
|
-
raise ValueError("Cannot create {} engine: specify an underlying table"
|
|
337
|
+
raise ValueError(f"Cannot create {self.__class__.__name__} engine: specify an underlying table")
|
|
336
338
|
|
|
337
339
|
params = ["`%s`" % p for p in [self.cluster, db.db_name, self.table_name]]
|
|
338
340
|
if self.sharding_key:
|