native-dumper 0.3.5.2__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- native_dumper/__init__.py +33 -0
- native_dumper/common/__init__.py +58 -0
- native_dumper/common/columns.py +30 -0
- native_dumper/common/connector.py +18 -0
- native_dumper/common/cursor.py +201 -0
- native_dumper/common/defines.py +6 -0
- native_dumper/common/diagram.py +78 -0
- native_dumper/common/errors.py +18 -0
- native_dumper/common/logger.py +70 -0
- native_dumper/common/multiquery.py +34 -0
- native_dumper/common/pyo3http/Cargo.toml +15 -0
- native_dumper/common/pyo3http/src/lib.rs +669 -0
- native_dumper/common/pyo3http.cp313-win_amd64.pyd +0 -0
- native_dumper/common/pyo3http.pyi +264 -0
- native_dumper/common/writer.py +11 -0
- native_dumper/dumper.py +513 -0
- native_dumper/version.py +1 -0
- native_dumper-0.3.5.2.dist-info/METADATA +198 -0
- native_dumper-0.3.5.2.dist-info/RECORD +22 -0
- native_dumper-0.3.5.2.dist-info/WHEEL +5 -0
- native_dumper-0.3.5.2.dist-info/licenses/LICENSE +21 -0
- native_dumper-0.3.5.2.dist-info/top_level.txt +1 -0
native_dumper/dumper.py
ADDED
|
@@ -0,0 +1,513 @@
|
|
|
1
|
+
from collections import OrderedDict
|
|
2
|
+
from gc import collect
|
|
3
|
+
from io import (
|
|
4
|
+
BufferedReader,
|
|
5
|
+
BufferedWriter,
|
|
6
|
+
)
|
|
7
|
+
from logging import Logger
|
|
8
|
+
from types import MethodType
|
|
9
|
+
from typing import (
|
|
10
|
+
Any,
|
|
11
|
+
BinaryIO,
|
|
12
|
+
Iterable,
|
|
13
|
+
Union,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
from light_compressor import (
|
|
17
|
+
CompressionMethod,
|
|
18
|
+
auto_detector,
|
|
19
|
+
define_reader,
|
|
20
|
+
define_writer,
|
|
21
|
+
)
|
|
22
|
+
from nativelib import (
|
|
23
|
+
NativeReader,
|
|
24
|
+
NativeWriter,
|
|
25
|
+
)
|
|
26
|
+
from pandas import DataFrame as PdFrame
|
|
27
|
+
from polars import DataFrame as PlFrame
|
|
28
|
+
from sqlparse import format as sql_format
|
|
29
|
+
|
|
30
|
+
from .common import (
|
|
31
|
+
CHUNK_SIZE,
|
|
32
|
+
DBMS_DEFAULT_TIMEOUT_SEC,
|
|
33
|
+
CHConnector,
|
|
34
|
+
ClickhouseServerError,
|
|
35
|
+
DBMetadata,
|
|
36
|
+
DumperLogger,
|
|
37
|
+
HTTPCursor,
|
|
38
|
+
NativeDumperError,
|
|
39
|
+
NativeDumperReadError,
|
|
40
|
+
NativeDumperValueError,
|
|
41
|
+
NativeDumperWriteError,
|
|
42
|
+
chunk_query,
|
|
43
|
+
file_writer,
|
|
44
|
+
make_columns,
|
|
45
|
+
transfer_diagram,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class NativeDumper:
|
|
50
|
+
"""Class for read and write Native format."""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
connector: CHConnector,
|
|
55
|
+
compression_method: CompressionMethod = CompressionMethod.ZSTD,
|
|
56
|
+
logger: Logger | None = None,
|
|
57
|
+
timeout: int = DBMS_DEFAULT_TIMEOUT_SEC,
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Class initialization."""
|
|
60
|
+
|
|
61
|
+
if not logger:
|
|
62
|
+
logger = DumperLogger()
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
self.connector = connector
|
|
66
|
+
|
|
67
|
+
if int(self.connector.port) == 9000:
|
|
68
|
+
raise ValueError(
|
|
69
|
+
"NativeDumper don't support port 9000, please, use 8123."
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
self.compression_method = compression_method
|
|
73
|
+
self.logger = logger
|
|
74
|
+
self.cursor = HTTPCursor(
|
|
75
|
+
connector=self.connector,
|
|
76
|
+
compression_method=self.compression_method,
|
|
77
|
+
logger=self.logger,
|
|
78
|
+
timeout=timeout,
|
|
79
|
+
user_agent=self.__class__.__name__,
|
|
80
|
+
)
|
|
81
|
+
self.version = self.cursor.send_hello()
|
|
82
|
+
self._dbmeta: DBMetadata | None = None
|
|
83
|
+
except ClickhouseServerError as error:
|
|
84
|
+
raise error
|
|
85
|
+
except Exception as error:
|
|
86
|
+
logger.error(f"NativeDumperError: {error}")
|
|
87
|
+
raise NativeDumperError(error)
|
|
88
|
+
|
|
89
|
+
self.dbname = "clickhouse"
|
|
90
|
+
self.logger.info(
|
|
91
|
+
f"NativeDumper initialized for host {self.connector.host}"
|
|
92
|
+
f"[{self.dbname} {self.version}]"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def multiquery(dump_method: MethodType):
|
|
97
|
+
"""Multiquery decorator."""
|
|
98
|
+
|
|
99
|
+
def wrapper(*args, **kwargs):
|
|
100
|
+
|
|
101
|
+
first_part: list[str]
|
|
102
|
+
second_part: list[str]
|
|
103
|
+
|
|
104
|
+
self: NativeDumper = args[0]
|
|
105
|
+
cursor: HTTPCursor = (kwargs.get("dumper_src") or self).cursor
|
|
106
|
+
query: str = kwargs.get("query_src") or kwargs.get("query")
|
|
107
|
+
part: int = 1
|
|
108
|
+
first_part, second_part = chunk_query(self.query_formatter(query))
|
|
109
|
+
total_parts = len(sum((first_part, second_part), [])) + int(
|
|
110
|
+
bool(kwargs.get("table_name") or kwargs.get("table_src"))
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
if len(first_part) > 1:
|
|
114
|
+
for query in first_part:
|
|
115
|
+
self.logger.info(f"Execute query {part}/{total_parts}")
|
|
116
|
+
cursor.execute(query)
|
|
117
|
+
part += 1
|
|
118
|
+
|
|
119
|
+
if second_part:
|
|
120
|
+
for key in ("query", "query_src"):
|
|
121
|
+
if key in kwargs:
|
|
122
|
+
kwargs[key] = second_part.pop(0)
|
|
123
|
+
break
|
|
124
|
+
|
|
125
|
+
self.logger.info(
|
|
126
|
+
f"Execute stream {part}/{total_parts} [native mode]"
|
|
127
|
+
)
|
|
128
|
+
output = dump_method(*args, **kwargs)
|
|
129
|
+
|
|
130
|
+
if second_part:
|
|
131
|
+
for query in second_part:
|
|
132
|
+
part += 1
|
|
133
|
+
self.logger.info(f"Execute query {part}/{total_parts}")
|
|
134
|
+
cursor.execute(query)
|
|
135
|
+
|
|
136
|
+
if output:
|
|
137
|
+
self.refresh()
|
|
138
|
+
|
|
139
|
+
collect()
|
|
140
|
+
return output
|
|
141
|
+
|
|
142
|
+
return wrapper
|
|
143
|
+
|
|
144
|
+
def query_formatter(self, query: str) -> str | None:
|
|
145
|
+
"""Reformat query."""
|
|
146
|
+
|
|
147
|
+
if not query:
|
|
148
|
+
return
|
|
149
|
+
return sql_format(sql=query, strip_comments=True).strip().strip(";")
|
|
150
|
+
|
|
151
|
+
@multiquery
|
|
152
|
+
def __read_dump(
|
|
153
|
+
self,
|
|
154
|
+
fileobj: BufferedWriter,
|
|
155
|
+
query: str | None,
|
|
156
|
+
table_name: str | None,
|
|
157
|
+
) -> bool:
|
|
158
|
+
"""Internal method read_dump for generate kwargs to decorator."""
|
|
159
|
+
|
|
160
|
+
if not query and not table_name:
|
|
161
|
+
error_message = "Query or table name not defined."
|
|
162
|
+
self.logger.error(f"NativeDumperValueError: {error_message}")
|
|
163
|
+
raise NativeDumperValueError(error_message)
|
|
164
|
+
|
|
165
|
+
if not query:
|
|
166
|
+
query = f"SELECT * FROM {table_name}"
|
|
167
|
+
|
|
168
|
+
self.logger.info(f"Start read from {self.connector.host}.")
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
self.logger.info(
|
|
172
|
+
"Reading native dump with compression "
|
|
173
|
+
f"{self.compression_method.name}."
|
|
174
|
+
)
|
|
175
|
+
columns = make_columns(self.cursor.metadata(f"({query}\n)"))
|
|
176
|
+
source = DBMetadata(
|
|
177
|
+
name=self.dbname,
|
|
178
|
+
version=self.version,
|
|
179
|
+
columns=columns,
|
|
180
|
+
)
|
|
181
|
+
destination = DBMetadata(
|
|
182
|
+
name="file",
|
|
183
|
+
version=fileobj.name,
|
|
184
|
+
columns=columns,
|
|
185
|
+
)
|
|
186
|
+
self.logger.info(transfer_diagram(source, destination))
|
|
187
|
+
stream = self.cursor.get_response(query)
|
|
188
|
+
size = 0
|
|
189
|
+
|
|
190
|
+
while chunk := stream.read(CHUNK_SIZE):
|
|
191
|
+
size += fileobj.write(chunk)
|
|
192
|
+
del chunk
|
|
193
|
+
|
|
194
|
+
stream.close()
|
|
195
|
+
fileobj.close()
|
|
196
|
+
self.logger.info(f"Successfully read {size} bytes.")
|
|
197
|
+
|
|
198
|
+
if not size:
|
|
199
|
+
self.logger.warning("Empty data read!")
|
|
200
|
+
|
|
201
|
+
self.logger.info(f"Read from {self.connector.host} done.")
|
|
202
|
+
return True
|
|
203
|
+
except ClickhouseServerError as error:
|
|
204
|
+
raise error
|
|
205
|
+
except Exception as error:
|
|
206
|
+
self.logger.error(f"NativeDumperReadError: {error}")
|
|
207
|
+
raise NativeDumperReadError(error)
|
|
208
|
+
|
|
209
|
+
@multiquery
|
|
210
|
+
def __write_between(
|
|
211
|
+
self,
|
|
212
|
+
table_dest: str,
|
|
213
|
+
table_src: str | None,
|
|
214
|
+
query_src: str | None,
|
|
215
|
+
dumper_src: Union["NativeDumper", object],
|
|
216
|
+
) -> bool:
|
|
217
|
+
"""Internal method write_between for generate kwargs to decorator."""
|
|
218
|
+
|
|
219
|
+
if not query_src and not table_src:
|
|
220
|
+
error_message = "Source query or table name not defined."
|
|
221
|
+
self.logger.error(f"NativeDumperValueError: {error_message}")
|
|
222
|
+
raise NativeDumperValueError(error_message)
|
|
223
|
+
|
|
224
|
+
if not table_dest:
|
|
225
|
+
error_message = "Destination table name not defined."
|
|
226
|
+
self.logger.error(f"NativeDumperValueError: {error_message}")
|
|
227
|
+
raise NativeDumperValueError(error_message)
|
|
228
|
+
|
|
229
|
+
if not dumper_src:
|
|
230
|
+
cursor = HTTPCursor(
|
|
231
|
+
connector=self.connector,
|
|
232
|
+
compression_method=self.compression_method,
|
|
233
|
+
logger=self.logger,
|
|
234
|
+
timeout=self.cursor.timeout,
|
|
235
|
+
)
|
|
236
|
+
src_dbname = self.dbname
|
|
237
|
+
src_version = self.version
|
|
238
|
+
self.logger.info(
|
|
239
|
+
f"Set new connection for host {self.connector.host}."
|
|
240
|
+
)
|
|
241
|
+
elif dumper_src.__class__ is NativeDumper:
|
|
242
|
+
cursor = dumper_src.cursor
|
|
243
|
+
src_dbname = dumper_src.dbname
|
|
244
|
+
src_version = dumper_src.version
|
|
245
|
+
else:
|
|
246
|
+
if query_src:
|
|
247
|
+
query_src = query_src.strip().strip(";")
|
|
248
|
+
|
|
249
|
+
reader = dumper_src.to_reader(
|
|
250
|
+
query=query_src,
|
|
251
|
+
table_name=table_src,
|
|
252
|
+
)
|
|
253
|
+
dtype_data = reader.to_rows()
|
|
254
|
+
self.from_rows(
|
|
255
|
+
dtype_data=dtype_data,
|
|
256
|
+
table_name=table_dest,
|
|
257
|
+
source=dumper_src._dbmeta,
|
|
258
|
+
)
|
|
259
|
+
size = reader.tell()
|
|
260
|
+
self.logger.info(f"Successfully sending {size} bytes.")
|
|
261
|
+
|
|
262
|
+
if not size:
|
|
263
|
+
self.logger.warning("Empty data send!")
|
|
264
|
+
|
|
265
|
+
return reader.close()
|
|
266
|
+
|
|
267
|
+
if not query_src:
|
|
268
|
+
query_src = f"SELECT * FROM {table_src}"
|
|
269
|
+
else:
|
|
270
|
+
query_src = query_src.strip().strip(";")
|
|
271
|
+
|
|
272
|
+
source = DBMetadata(
|
|
273
|
+
name=src_dbname,
|
|
274
|
+
version=src_version,
|
|
275
|
+
columns=make_columns(cursor.metadata(f"({query_src})")),
|
|
276
|
+
)
|
|
277
|
+
destination = DBMetadata(
|
|
278
|
+
name=self.dbname,
|
|
279
|
+
version=self.version,
|
|
280
|
+
columns=make_columns(self.cursor.metadata(table_dest)),
|
|
281
|
+
)
|
|
282
|
+
self.logger.info(transfer_diagram(source, destination))
|
|
283
|
+
stream = cursor.get_response(query_src)
|
|
284
|
+
self.write_dump(stream, table_dest, cursor.compression_method)
|
|
285
|
+
|
|
286
|
+
@multiquery
|
|
287
|
+
def __to_reader(
|
|
288
|
+
self,
|
|
289
|
+
query: str | None,
|
|
290
|
+
table_name: str | None,
|
|
291
|
+
) -> NativeReader:
|
|
292
|
+
"""Internal method to_reader for generate kwargs to decorator."""
|
|
293
|
+
|
|
294
|
+
if not query and not table_name:
|
|
295
|
+
error_message = "Query or table name not defined."
|
|
296
|
+
self.logger.error(f"NativeDumperValueError: {error_message}")
|
|
297
|
+
raise NativeDumperValueError(error_message)
|
|
298
|
+
|
|
299
|
+
if not query:
|
|
300
|
+
query = f"SELECT * FROM {table_name}"
|
|
301
|
+
|
|
302
|
+
self.logger.info(
|
|
303
|
+
f"Get NativeReader object from {self.connector.host}."
|
|
304
|
+
)
|
|
305
|
+
self._dbmeta = DBMetadata(
|
|
306
|
+
name=self.dbname,
|
|
307
|
+
version=self.version,
|
|
308
|
+
columns=make_columns(self.cursor.metadata(f"({query}\n)")),
|
|
309
|
+
)
|
|
310
|
+
return self.cursor.get_stream(query)
|
|
311
|
+
|
|
312
|
+
def read_dump(
|
|
313
|
+
self,
|
|
314
|
+
fileobj: BufferedWriter,
|
|
315
|
+
query: str | None = None,
|
|
316
|
+
table_name: str | None = None,
|
|
317
|
+
) -> bool:
|
|
318
|
+
"""Read Native dump from Clickhouse."""
|
|
319
|
+
|
|
320
|
+
return self.__read_dump(
|
|
321
|
+
fileobj=fileobj,
|
|
322
|
+
query=query,
|
|
323
|
+
table_name=table_name,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
def write_dump(
|
|
327
|
+
self,
|
|
328
|
+
fileobj: BufferedReader | BinaryIO,
|
|
329
|
+
table_name: str,
|
|
330
|
+
compression_method: CompressionMethod | None = None,
|
|
331
|
+
) -> None:
|
|
332
|
+
"""Write Native dump into Clickhouse."""
|
|
333
|
+
|
|
334
|
+
if not table_name:
|
|
335
|
+
error_message = "Table name not defined."
|
|
336
|
+
self.logger.error(f"NativeDumperValueError: {error_message}")
|
|
337
|
+
raise NativeDumperValueError(error_message)
|
|
338
|
+
|
|
339
|
+
self.logger.info(
|
|
340
|
+
f"Start write into {self.connector.host}.{table_name}."
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
try:
|
|
344
|
+
if not compression_method:
|
|
345
|
+
compression_method = auto_detector(fileobj)
|
|
346
|
+
|
|
347
|
+
if compression_method != self.compression_method:
|
|
348
|
+
reader = define_reader(fileobj, compression_method)
|
|
349
|
+
data = define_writer(
|
|
350
|
+
file_writer(reader),
|
|
351
|
+
self.compression_method,
|
|
352
|
+
)
|
|
353
|
+
else:
|
|
354
|
+
reader = fileobj
|
|
355
|
+
data = file_writer(reader)
|
|
356
|
+
|
|
357
|
+
self.cursor.upload_data(
|
|
358
|
+
table=table_name,
|
|
359
|
+
data=data,
|
|
360
|
+
)
|
|
361
|
+
collect()
|
|
362
|
+
size = reader.tell()
|
|
363
|
+
self.logger.info(f"Successfully sending {size} bytes.")
|
|
364
|
+
|
|
365
|
+
if not size:
|
|
366
|
+
self.logger.warning("Empty data send!")
|
|
367
|
+
|
|
368
|
+
reader.close()
|
|
369
|
+
except ClickhouseServerError as error:
|
|
370
|
+
raise error
|
|
371
|
+
except Exception as error:
|
|
372
|
+
self.logger.error(f"NativeDumperWriteError: {error}")
|
|
373
|
+
raise NativeDumperWriteError(error)
|
|
374
|
+
|
|
375
|
+
self.logger.info(
|
|
376
|
+
f"Write into {self.connector.host}.{table_name} done."
|
|
377
|
+
)
|
|
378
|
+
self.refresh()
|
|
379
|
+
|
|
380
|
+
def write_between(
|
|
381
|
+
self,
|
|
382
|
+
table_dest: str,
|
|
383
|
+
table_src: str | None = None,
|
|
384
|
+
query_src: str | None = None,
|
|
385
|
+
dumper_src: Union["NativeDumper", object] = None,
|
|
386
|
+
) -> bool:
|
|
387
|
+
"""Write between Clickhouse servers."""
|
|
388
|
+
|
|
389
|
+
return self.__write_between(
|
|
390
|
+
table_dest=table_dest,
|
|
391
|
+
table_src=table_src,
|
|
392
|
+
query_src=query_src,
|
|
393
|
+
dumper_src=dumper_src,
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
def to_reader(
|
|
397
|
+
self,
|
|
398
|
+
query: str | None = None,
|
|
399
|
+
table_name: str | None = None,
|
|
400
|
+
) -> NativeReader:
|
|
401
|
+
"""Get stream from Clickhouse as NativeReader object."""
|
|
402
|
+
|
|
403
|
+
return self.__to_reader(
|
|
404
|
+
query=query,
|
|
405
|
+
table_name=table_name,
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
def from_rows(
|
|
409
|
+
self,
|
|
410
|
+
dtype_data: Iterable[Any],
|
|
411
|
+
table_name: str,
|
|
412
|
+
source: DBMetadata | None = None,
|
|
413
|
+
) -> None:
|
|
414
|
+
"""Write from python list into Clickhouse table."""
|
|
415
|
+
|
|
416
|
+
if not table_name:
|
|
417
|
+
error_message = "Table name not defined."
|
|
418
|
+
self.logger.error(f"NativeDumperValueError: {error_message}")
|
|
419
|
+
raise NativeDumperValueError(error_message)
|
|
420
|
+
|
|
421
|
+
if not source:
|
|
422
|
+
source = DBMetadata(
|
|
423
|
+
name="python",
|
|
424
|
+
version="iterable object",
|
|
425
|
+
columns={"Unknown": "Unknown"},
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
column_list = self.cursor.metadata(table_name)
|
|
429
|
+
writer = NativeWriter(column_list)
|
|
430
|
+
data = define_writer(
|
|
431
|
+
writer.from_rows(dtype_data),
|
|
432
|
+
self.compression_method,
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
destination = DBMetadata(
|
|
436
|
+
name=self.dbname,
|
|
437
|
+
version=self.version,
|
|
438
|
+
columns=make_columns(column_list),
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
self.logger.info(transfer_diagram(source, destination))
|
|
442
|
+
collect()
|
|
443
|
+
self.logger.info(
|
|
444
|
+
f"Start write into {self.connector.host}.{table_name}."
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
try:
|
|
448
|
+
self.cursor.upload_data(
|
|
449
|
+
table=table_name,
|
|
450
|
+
data=data,
|
|
451
|
+
)
|
|
452
|
+
except ClickhouseServerError as error:
|
|
453
|
+
raise error
|
|
454
|
+
except Exception as error:
|
|
455
|
+
self.logger.error(f"NativeDumperWriteError: {error}")
|
|
456
|
+
raise NativeDumperWriteError(error)
|
|
457
|
+
|
|
458
|
+
self.logger.info(
|
|
459
|
+
f"Write into {self.connector.host}.{table_name} done."
|
|
460
|
+
)
|
|
461
|
+
self.refresh()
|
|
462
|
+
|
|
463
|
+
def from_pandas(
|
|
464
|
+
self,
|
|
465
|
+
data_frame: PdFrame,
|
|
466
|
+
table_name: str,
|
|
467
|
+
) -> None:
|
|
468
|
+
"""Write from pandas.DataFrame into Clickhouse table."""
|
|
469
|
+
|
|
470
|
+
self.from_rows(
|
|
471
|
+
dtype_data=iter(data_frame.values),
|
|
472
|
+
table_name=table_name,
|
|
473
|
+
source=DBMetadata(
|
|
474
|
+
name="pandas",
|
|
475
|
+
version="DataFrame",
|
|
476
|
+
columns=OrderedDict(zip(
|
|
477
|
+
data_frame.columns,
|
|
478
|
+
[str(dtype) for dtype in data_frame.dtypes],
|
|
479
|
+
)),
|
|
480
|
+
)
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
def from_polars(
|
|
484
|
+
self,
|
|
485
|
+
data_frame: PlFrame,
|
|
486
|
+
table_name: str,
|
|
487
|
+
) -> None:
|
|
488
|
+
"""Write from polars.DataFrame into Clickhouse table."""
|
|
489
|
+
|
|
490
|
+
self.from_rows(
|
|
491
|
+
dtype_data=data_frame.iter_rows(),
|
|
492
|
+
table_name=table_name,
|
|
493
|
+
source=DBMetadata(
|
|
494
|
+
name="polars",
|
|
495
|
+
version="DataFrame",
|
|
496
|
+
columns=OrderedDict(zip(
|
|
497
|
+
data_frame.columns,
|
|
498
|
+
[str(dtype) for dtype in data_frame.dtypes],
|
|
499
|
+
)),
|
|
500
|
+
)
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
def refresh(self) -> None:
|
|
504
|
+
"""Refresh session."""
|
|
505
|
+
|
|
506
|
+
self.cursor.refresh()
|
|
507
|
+
self.logger.info(f"Connection to host {self.connector.host} updated.")
|
|
508
|
+
|
|
509
|
+
def close(self) -> None:
|
|
510
|
+
"""Close session."""
|
|
511
|
+
|
|
512
|
+
self.cursor.close()
|
|
513
|
+
self.logger.info(f"Connection to host {self.connector.host} closed.")
|
native_dumper/version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.5.2"
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: native-dumper
|
|
3
|
+
Version: 0.3.5.2
|
|
4
|
+
Summary: Library for read and write Native format between Clickhouse and file.
|
|
5
|
+
Home-page: https://0xmihalich.github.io/dbhose_airflow/classes/native_dumper/index.html
|
|
6
|
+
Author: 0xMihalich
|
|
7
|
+
Author-email: bayanmobile87@gmail.com
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: light-compressor==0.0.2.2
|
|
19
|
+
Requires-Dist: nativelib==0.2.2.6
|
|
20
|
+
Requires-Dist: sqlparse>=0.5.5
|
|
21
|
+
Dynamic: author
|
|
22
|
+
Dynamic: author-email
|
|
23
|
+
Dynamic: classifier
|
|
24
|
+
Dynamic: description
|
|
25
|
+
Dynamic: description-content-type
|
|
26
|
+
Dynamic: home-page
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
Dynamic: requires-dist
|
|
29
|
+
Dynamic: requires-python
|
|
30
|
+
Dynamic: summary
|
|
31
|
+
|
|
32
|
+
# NativeDumper
|
|
33
|
+
|
|
34
|
+
Library for read and write Native format between Clickhouse and file
|
|
35
|
+
|
|
36
|
+
## Examples
|
|
37
|
+
|
|
38
|
+
### Initialization
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from native_dumper import (
|
|
42
|
+
CompressionMethod,
|
|
43
|
+
CHConnector,
|
|
44
|
+
NativeDumper,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
connector = CHConnector(
|
|
48
|
+
host = <your host>,
|
|
49
|
+
dbname = <your database>,
|
|
50
|
+
user = <your username>,
|
|
51
|
+
password = <your password>,
|
|
52
|
+
port = 8123,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
dumper = NativeDumper(
|
|
56
|
+
connector=connector,
|
|
57
|
+
compression_method=CompressionMethod.ZSTD, # or CompressionMethod.LZ4 or CompressionMethod.NONE
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Read dump from Clickhouse into file
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
file_name = "native.zstd"
|
|
65
|
+
# you need define one of parameter query or table_name
|
|
66
|
+
query = "select ..." # some sql query
|
|
67
|
+
table_name = "default.test_table" # or some table
|
|
68
|
+
|
|
69
|
+
with open(file_name, "wb") as fileobj:
|
|
70
|
+
dumper.read_dump(
|
|
71
|
+
fileobj,
|
|
72
|
+
query,
|
|
73
|
+
table_name,
|
|
74
|
+
)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Write dump from file into Clickhouse
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
file_name = "native.zstd"
|
|
81
|
+
# you need define one of parameter table_name
|
|
82
|
+
table_name = "default.test_table" # some table
|
|
83
|
+
|
|
84
|
+
with open(file_name, "rb") as fileobj:
|
|
85
|
+
dumper.write_dump(
|
|
86
|
+
fileobj,
|
|
87
|
+
table_name,
|
|
88
|
+
)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Write from Clickhouse into Clickhouse
|
|
92
|
+
|
|
93
|
+
Same server
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
|
|
97
|
+
table_dest = "default.test_table_write" # some table for write
|
|
98
|
+
table_src = "default.test_table_read" # some table for read
|
|
99
|
+
query_src = "select ..." # or some sql query for read
|
|
100
|
+
|
|
101
|
+
dumper.write_between(
|
|
102
|
+
table_dest,
|
|
103
|
+
table_src,
|
|
104
|
+
query_src,
|
|
105
|
+
)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Different servers
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
|
|
112
|
+
connector_src = CHConnector(
|
|
113
|
+
host = <host src>,
|
|
114
|
+
dbname = <database src>,
|
|
115
|
+
user = <username src>,
|
|
116
|
+
password = <password src>,
|
|
117
|
+
port = 8123,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
dumper_src = NativeDumper(connector=connector_src)
|
|
121
|
+
|
|
122
|
+
table_dest = "default.test_table_write" # some table for write
|
|
123
|
+
table_src = "default.test_table_read" # some table for read
|
|
124
|
+
query_src = "select ..." # or some sql query for read
|
|
125
|
+
|
|
126
|
+
dumper.write_between(
|
|
127
|
+
table_dest,
|
|
128
|
+
table_src,
|
|
129
|
+
query_src,
|
|
130
|
+
dumper_src.cursor,
|
|
131
|
+
)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Get NativeReader object from stream
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
|
|
138
|
+
table_name = "default.test_table_read" # some table for read
|
|
139
|
+
query = "select ..." # or some sql query for read
|
|
140
|
+
|
|
141
|
+
reader = dumper.to_reader(
|
|
142
|
+
query=query,
|
|
143
|
+
table_name=table_name,
|
|
144
|
+
)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
NativeReader has three methods available,
|
|
148
|
+
but only one of the methods is available at a time within a single session.
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
# read as python generator object
|
|
152
|
+
reader.to_rows()
|
|
153
|
+
# or read as pandas.DataFrame
|
|
154
|
+
reader.to_pandas()
|
|
155
|
+
# or read as polars.DataFrame
|
|
156
|
+
reader.to_polars()
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Write from python objects into target table
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
# some table for write data
|
|
163
|
+
table_name = "default.test_table_write"
|
|
164
|
+
dtype_data: Itarable[Any]
|
|
165
|
+
pandas_frame: pandas.DataFrame
|
|
166
|
+
polars_frame: polars.DataFrame
|
|
167
|
+
|
|
168
|
+
# write from python object
|
|
169
|
+
dumper.from_rows(dtype_data, table_name)
|
|
170
|
+
# write from pandas.DataFrame
|
|
171
|
+
dumper.from_pandas(pandas_frame, table_name)
|
|
172
|
+
# write from polars.DataFrame
|
|
173
|
+
dumper.from_polars(polars_frame, table_name)
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Open Native file format
|
|
177
|
+
|
|
178
|
+
Get info from my another repository https://github.com/0xMihalich/nativelib
|
|
179
|
+
|
|
180
|
+
## Installation
|
|
181
|
+
|
|
182
|
+
### From pip
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
pip install native-dumper
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### From local directory
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
pip install .
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### From git
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
pip install git+https://github.com/0xMihalich/native_dumper
|
|
198
|
+
```
|