clickhouse-orm 2.2.2__tar.gz → 3.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: clickhouse_orm
3
+ Version: 3.1.0
4
+ Summary: A simple ORM for working with the Clickhouse database. Maintainance fork of infi.clickhouse_orm.
5
+ Author-email: Oliver Margetts <oliver.margetts@gmail.com>
6
+ Description-Content-Type: text/markdown
7
+ Classifier: Intended Audience :: Developers
8
+ Classifier: Intended Audience :: System Administrators
9
+ Classifier: License :: OSI Approved :: BSD License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Programming Language :: Python
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
16
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
17
+ Classifier: Topic :: Database
18
+ License-File: LICENSE
19
+ Requires-Dist: requests
20
+ Requires-Dist: pytz
21
+ Requires-Dist: docker==7.1.0 ; extra == "dev"
22
+ Requires-Dist: pytest==9.0.2 ; extra == "dev"
23
+ Requires-Dist: ruff==0.14.14 ; extra == "dev"
24
+ Project-URL: Homepage, https://github.com/SuadeLabs/clickhouse_orm
25
+ Project-URL: Repository, https://github.com/SuadeLabs/clickhouse_orm
26
+ Provides-Extra: dev
27
+
28
+ A fork of [infi.clikchouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) aimed at more frequent maintenance and bugfixes.
29
+
30
+ [![Tests](https://github.com/SuadeLabs/clickhouse_orm/actions/workflows/python-test.yml/badge.svg)](https://github.com/SuadeLabs/clickhouse_orm/actions/workflows/python-test.yml)
31
+ ![PyPI](https://img.shields.io/pypi/v/clickhouse_orm)
32
+
33
+ Introduction
34
+ ============
35
+
36
+ This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/).
37
+ It allows you to define model classes whose instances can be written to the database and read from it.
38
+
39
+ Let's jump right in with a simple example of monitoring CPU usage. First we need to define the model class,
40
+ connect to the database and create a table for the model:
41
+
42
+ ```python
43
+ from clickhouse_orm import Database, Model, DateTimeField, UInt16Field, Float32Field, Memory, F
44
+
45
+ class CPUStats(Model):
46
+
47
+ timestamp = DateTimeField()
48
+ cpu_id = UInt16Field()
49
+ cpu_percent = Float32Field()
50
+
51
+ engine = Memory()
52
+
53
+ db = Database('demo')
54
+ db.create_table(CPUStats)
55
+ ```
56
+
57
+ Now we can collect usage statistics per CPU, and write them to the database:
58
+
59
+ ```python
60
+ import psutil, time, datetime
61
+
62
+ psutil.cpu_percent(percpu=True) # first sample should be discarded
63
+ with db.session(): # use a requests session for efficiency
64
+ while True:
65
+ time.sleep(1)
66
+ stats = psutil.cpu_percent(percpu=True)
67
+ timestamp = datetime.datetime.now()
68
+ db.insert([
69
+ CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent)
70
+ for cpu_id, cpu_percent in enumerate(stats)
71
+ ])
72
+ ```
73
+
74
+ Querying the table is easy, using either the query builder or raw SQL:
75
+
76
+ ```python
77
+ # Calculate what percentage of the time CPU 1 was over 95% busy
78
+ queryset = CPUStats.objects_in(db)
79
+ total = queryset.filter(CPUStats.cpu_id == 1).count()
80
+ busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
81
+ print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total))
82
+
83
+ # Calculate the average usage per CPU
84
+ for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
85
+ print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row))
86
+ ```
87
+
88
+ This and other examples can be found in the `examples` folder.
89
+
90
+ To learn more please visit the [documentation](docs/toc.md).
91
+
@@ -0,0 +1,63 @@
1
+ A fork of [infi.clikchouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) aimed at more frequent maintenance and bugfixes.
2
+
3
+ [![Tests](https://github.com/SuadeLabs/clickhouse_orm/actions/workflows/python-test.yml/badge.svg)](https://github.com/SuadeLabs/clickhouse_orm/actions/workflows/python-test.yml)
4
+ ![PyPI](https://img.shields.io/pypi/v/clickhouse_orm)
5
+
6
+ Introduction
7
+ ============
8
+
9
+ This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/).
10
+ It allows you to define model classes whose instances can be written to the database and read from it.
11
+
12
+ Let's jump right in with a simple example of monitoring CPU usage. First we need to define the model class,
13
+ connect to the database and create a table for the model:
14
+
15
+ ```python
16
+ from clickhouse_orm import Database, Model, DateTimeField, UInt16Field, Float32Field, Memory, F
17
+
18
+ class CPUStats(Model):
19
+
20
+ timestamp = DateTimeField()
21
+ cpu_id = UInt16Field()
22
+ cpu_percent = Float32Field()
23
+
24
+ engine = Memory()
25
+
26
+ db = Database('demo')
27
+ db.create_table(CPUStats)
28
+ ```
29
+
30
+ Now we can collect usage statistics per CPU, and write them to the database:
31
+
32
+ ```python
33
+ import psutil, time, datetime
34
+
35
+ psutil.cpu_percent(percpu=True) # first sample should be discarded
36
+ with db.session(): # use a requests session for efficiency
37
+ while True:
38
+ time.sleep(1)
39
+ stats = psutil.cpu_percent(percpu=True)
40
+ timestamp = datetime.datetime.now()
41
+ db.insert([
42
+ CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent)
43
+ for cpu_id, cpu_percent in enumerate(stats)
44
+ ])
45
+ ```
46
+
47
+ Querying the table is easy, using either the query builder or raw SQL:
48
+
49
+ ```python
50
+ # Calculate what percentage of the time CPU 1 was over 95% busy
51
+ queryset = CPUStats.objects_in(db)
52
+ total = queryset.filter(CPUStats.cpu_id == 1).count()
53
+ busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
54
+ print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total))
55
+
56
+ # Calculate the average usage per CPU
57
+ for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
58
+ print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row))
59
+ ```
60
+
61
+ This and other examples can be found in the `examples` folder.
62
+
63
+ To learn more please visit the [documentation](docs/toc.md).
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from inspect import isclass
2
4
 
3
5
  from .database import * # noqa: F401, F403
@@ -1,6 +1,9 @@
1
+ from __future__ import annotations
2
+
1
3
  import datetime
2
4
  import logging
3
5
  import re
6
+ from contextlib import contextmanager
4
7
  from math import ceil
5
8
  from string import Template
6
9
 
@@ -13,13 +16,11 @@ from .utils import Page, import_submodules, parse_tsv
13
16
  logger = logging.getLogger("clickhouse_orm")
14
17
 
15
18
 
16
- class DatabaseException(Exception):
19
+ class DatabaseException(Exception): # noqa: N818
17
20
  """
18
21
  Raised when a database operation fails.
19
22
  """
20
23
 
21
- pass
22
-
23
24
 
24
25
  class ServerError(DatabaseException):
25
26
  """
@@ -35,7 +36,7 @@ class ServerError(DatabaseException):
35
36
  # just skip custom init
36
37
  # if non-standard message format
37
38
  self.message = message
38
- super(ServerError, self).__init__(message)
39
+ super().__init__(message)
39
40
 
40
41
  ERROR_PATTERNS = (
41
42
  # ClickHouse prior to v19.3.3
@@ -55,6 +56,14 @@ class ServerError(DatabaseException):
55
56
  """,
56
57
  re.VERBOSE | re.DOTALL,
57
58
  ),
59
+ # ClickHouse v21+
60
+ re.compile(
61
+ r"""
62
+ Code:\ (?P<code>\d+).
63
+ \ (?P<type1>[^ \n]+):\ (?P<msg>.+)
64
+ """,
65
+ re.VERBOSE | re.DOTALL,
66
+ ),
58
67
  )
59
68
 
60
69
  @classmethod
@@ -75,19 +84,21 @@ class ServerError(DatabaseException):
75
84
 
76
85
  def __str__(self):
77
86
  if self.code is not None:
78
- return "{} ({})".format(self.message, self.code)
87
+ return f"{self.message} ({self.code})"
79
88
 
80
89
 
81
- class Database(object):
90
+ class Database:
82
91
  """
83
92
  Database instances connect to a specific ClickHouse database for running queries,
84
93
  inserting data and other operations.
85
94
  """
86
95
 
96
+ _default_url = "http://localhost:8123/"
97
+
87
98
  def __init__(
88
99
  self,
89
100
  db_name,
90
- db_url="http://localhost:8123/",
101
+ db_url=None,
91
102
  username=None,
92
103
  password=None,
93
104
  readonly=False,
@@ -95,6 +106,7 @@ class Database(object):
95
106
  timeout=60,
96
107
  verify_ssl_cert=True,
97
108
  log_statements=False,
109
+ session=None,
98
110
  ):
99
111
  """
100
112
  Initializes a database instance. Unless it's readonly, the database will be
@@ -111,13 +123,14 @@ class Database(object):
111
123
  - `log_statements`: when True, all database statements are logged.
112
124
  """
113
125
  self.db_name = db_name
114
- self.db_url = db_url
115
- self.readonly = False
126
+ self.db_url = db_url or self._default_url
127
+ self.readonly = self.connection_readonly = False
116
128
  self.timeout = timeout
117
- self.request_session = requests.Session()
118
- self.request_session.verify = verify_ssl_cert
119
- if username:
120
- self.request_session.auth = (username, password or "")
129
+ self.verify_ssl_cert = verify_ssl_cert
130
+ self.request_session = None
131
+ self.__username = username
132
+ self.__password = password
133
+
121
134
  self.log_statements = log_statements
122
135
  self.settings = {}
123
136
  self.db_exists = False # this is required before running _is_existing_database
@@ -137,6 +150,22 @@ class Database(object):
137
150
  # Version 19.0 and above support LowCardinality
138
151
  self.has_low_cardinality_support = self.server_version >= (19, 0)
139
152
 
153
+ @contextmanager
154
+ def session(self):
155
+ """Contextmanager to use a persistent session for requests.
156
+
157
+ This can be quicker if making lots of small queries.
158
+ """
159
+ with requests.Session() as session:
160
+ session.verify = self.verify_ssl_cert
161
+ if self.__username:
162
+ session.auth = (self.__username, self.__password or "")
163
+ self.request_session = session
164
+ try:
165
+ yield self
166
+ finally:
167
+ self.request_session = None
168
+
140
169
  def create_database(self):
141
170
  """
142
171
  Creates the database on the ClickHouse server if it does not already exist.
@@ -388,7 +417,20 @@ class Database(object):
388
417
  if self.log_statements:
389
418
  logger.info(data)
390
419
  params = self._build_params(settings)
391
- r = self.request_session.post(self.db_url, params=params, data=data, stream=stream, timeout=self.timeout)
420
+
421
+ if self.request_session:
422
+ r = self.request_session.post(self.db_url, params=params, data=data, stream=stream, timeout=self.timeout)
423
+ else:
424
+ r = requests.post(
425
+ self.db_url,
426
+ params=params,
427
+ data=data,
428
+ stream=stream,
429
+ timeout=self.timeout,
430
+ verify=self.verify_ssl_cert,
431
+ auth=(self.__username, self.__password or "") if self.__username else None,
432
+ )
433
+
392
434
  if r.status_code != 200:
393
435
  raise ServerError(r.text)
394
436
  return r
@@ -432,7 +474,7 @@ class Database(object):
432
474
  except ServerError as e:
433
475
  logger.exception("Cannot determine server version (%s), assuming 1.1.0", e)
434
476
  ver = "1.1.0"
435
- return tuple(int(n) for n in ver.split(".")) if as_tuple else ver
477
+ return tuple(int(n) for n in ver.split(".") if n.isdigit()) if as_tuple else ver
436
478
 
437
479
  def _is_existing_database(self):
438
480
  r = self._send("SELECT count() FROM system.databases WHERE name = '%s'" % self.db_name)
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
 
3
5
  from .utils import comma_join, get_subclass_names
@@ -5,7 +7,7 @@ from .utils import comma_join, get_subclass_names
5
7
  logger = logging.getLogger("clickhouse_orm")
6
8
 
7
9
 
8
- class Engine(object):
10
+ class Engine:
9
11
  def create_table_sql(self, db):
10
12
  raise NotImplementedError() # pragma: no cover
11
13
 
@@ -44,9 +46,9 @@ class MergeTree(Engine):
44
46
  list,
45
47
  tuple,
46
48
  ), "partition_key must be tuple or list if present"
47
- assert (replica_table_path is None) == (
48
- replica_name is None
49
- ), "both replica_table_path and replica_name must be specified"
49
+ assert (replica_table_path is None) == (replica_name is None), (
50
+ "both replica_table_path and replica_name must be specified"
51
+ )
50
52
 
51
53
  # These values conflict with each other (old and new syntax of table engines.
52
54
  # So let's control only one of them is given.
@@ -145,7 +147,7 @@ class CollapsingMergeTree(MergeTree):
145
147
  partition_key=None,
146
148
  primary_key=None,
147
149
  ):
148
- super(CollapsingMergeTree, self).__init__(
150
+ super().__init__(
149
151
  date_col,
150
152
  order_by,
151
153
  sampling_expr,
@@ -158,7 +160,7 @@ class CollapsingMergeTree(MergeTree):
158
160
  self.sign_col = sign_col
159
161
 
160
162
  def _build_sql_params(self, db):
161
- params = super(CollapsingMergeTree, self)._build_sql_params(db)
163
+ params = super()._build_sql_params(db)
162
164
  params.append(self.sign_col)
163
165
  return params
164
166
 
@@ -176,7 +178,7 @@ class SummingMergeTree(MergeTree):
176
178
  partition_key=None,
177
179
  primary_key=None,
178
180
  ):
179
- super(SummingMergeTree, self).__init__(
181
+ super().__init__(
180
182
  date_col,
181
183
  order_by,
182
184
  sampling_expr,
@@ -190,7 +192,7 @@ class SummingMergeTree(MergeTree):
190
192
  self.summing_cols = summing_cols
191
193
 
192
194
  def _build_sql_params(self, db):
193
- params = super(SummingMergeTree, self)._build_sql_params(db)
195
+ params = super()._build_sql_params(db)
194
196
  if self.summing_cols:
195
197
  params.append("(%s)" % comma_join(self.summing_cols))
196
198
  return params
@@ -209,7 +211,7 @@ class ReplacingMergeTree(MergeTree):
209
211
  partition_key=None,
210
212
  primary_key=None,
211
213
  ):
212
- super(ReplacingMergeTree, self).__init__(
214
+ super().__init__(
213
215
  date_col,
214
216
  order_by,
215
217
  sampling_expr,
@@ -222,7 +224,7 @@ class ReplacingMergeTree(MergeTree):
222
224
  self.ver_col = ver_col
223
225
 
224
226
  def _build_sql_params(self, db):
225
- params = super(ReplacingMergeTree, self)._build_sql_params(db)
227
+ params = super()._build_sql_params(db)
226
228
  if self.ver_col:
227
229
  params.append(self.ver_col)
228
230
  return params
@@ -332,7 +334,7 @@ class Distributed(Engine):
332
334
 
333
335
  def _build_sql_params(self, db):
334
336
  if self.table_name is None:
335
- raise ValueError("Cannot create {} engine: specify an underlying table".format(self.__class__.__name__))
337
+ raise ValueError(f"Cannot create {self.__class__.__name__} engine: specify an underlying table")
336
338
 
337
339
  params = ["`%s`" % p for p in [self.cluster, db.db_name, self.table_name]]
338
340
  if self.sharding_key: