native-dumper 0.3.5.0__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ """Library for read and write Native format between Clickhouse and file."""
2
+
3
+ from light_compressor import CompressionMethod
4
+
5
+ from .common import (
6
+ CHConnector,
7
+ ClickhouseServerError,
8
+ DumperLogger,
9
+ HTTPCursor,
10
+ NativeDumperError,
11
+ NativeDumperReadError,
12
+ NativeDumperValueError,
13
+ NativeDumperWriteError,
14
+ )
15
+
16
+ from .dumper import NativeDumper
17
+ from .version import __version__
18
+
19
+
20
+ __all__ = (
21
+ "__version__",
22
+ "CHConnector",
23
+ "ClickhouseServerError",
24
+ "CompressionMethod",
25
+ "DumperLogger",
26
+ "HTTPCursor",
27
+ "NativeDumper",
28
+ "NativeDumperError",
29
+ "NativeDumperReadError",
30
+ "NativeDumperValueError",
31
+ "NativeDumperWriteError",
32
+ )
33
+ __author__ = "0xMihalich"
@@ -0,0 +1,58 @@
1
+ """Common utilities."""
2
+
3
+ from .columns import make_columns
4
+ from .connector import CHConnector
5
+ from .cursor import HTTPCursor
6
+ from .defines import (
7
+ CHUNK_SIZE,
8
+ DBMS_DEFAULT_TIMEOUT_SEC,
9
+ DEFAULT_DATABASE,
10
+ DEFAULT_PASSWORD,
11
+ DEFAULT_PORT,
12
+ DEFAULT_USER,
13
+ )
14
+ from .diagram import (
15
+ DBMetadata,
16
+ format_table,
17
+ transfer_diagram,
18
+ )
19
+ from .errors import (
20
+ ClickhouseServerError,
21
+ NativeDumperError,
22
+ NativeDumperReadError,
23
+ NativeDumperValueError,
24
+ NativeDumperWriteError,
25
+ )
26
+ from .logger import DumperLogger
27
+ from .multiquery import chunk_query
28
+ from .pyo3http import (
29
+ HttpResponse,
30
+ HttpSession,
31
+ )
32
+ from .writer import file_writer
33
+
34
+
35
+ __all__ = (
36
+ "CHUNK_SIZE",
37
+ "DBMS_DEFAULT_TIMEOUT_SEC",
38
+ "DEFAULT_DATABASE",
39
+ "DEFAULT_PASSWORD",
40
+ "DEFAULT_PORT",
41
+ "DEFAULT_USER",
42
+ "CHConnector",
43
+ "ClickhouseServerError",
44
+ "DBMetadata",
45
+ "DumperLogger",
46
+ "HTTPCursor",
47
+ "HttpResponse",
48
+ "HttpSession",
49
+ "NativeDumperError",
50
+ "NativeDumperReadError",
51
+ "NativeDumperValueError",
52
+ "NativeDumperWriteError",
53
+ "chunk_query",
54
+ "file_writer",
55
+ "format_table",
56
+ "make_columns",
57
+ "transfer_diagram",
58
+ )
@@ -0,0 +1,30 @@
1
+ from collections import OrderedDict
2
+
3
+ from nativelib import Column
4
+
5
+
6
+ def make_columns(
7
+ column_list: list[Column],
8
+ ) -> OrderedDict[str, str]:
9
+ """Make DBMetadata.columns dictionary."""
10
+
11
+ columns = OrderedDict()
12
+
13
+ for column in column_list:
14
+ col_type = column.dtype.name
15
+ info = column.info
16
+
17
+ if col_type == "FixedString":
18
+ col_type = f"{col_type}({info.length})"
19
+ elif col_type == "Decimal":
20
+ col_type = f"{col_type}({info.precision}, {info.scale})"
21
+ elif col_type == "DateTime64":
22
+ col_type = f"{col_type}({info.precision}, {info.tzinfo})"
23
+ elif col_type in ("Enum8", "Enum16"):
24
+ col_type = f"{col_type}({info.enumcase})"
25
+ elif col_type == "Time64":
26
+ col_type = f"{col_type}({info.precision})"
27
+
28
+ columns[column.column] = col_type
29
+
30
+ return columns
@@ -0,0 +1,18 @@
1
+ from typing import NamedTuple
2
+
3
+ from .defines import (
4
+ DEFAULT_DATABASE,
5
+ DEFAULT_USER,
6
+ DEFAULT_PASSWORD,
7
+ DEFAULT_PORT,
8
+ )
9
+
10
+
11
+ class CHConnector(NamedTuple):
12
+ """Connector for Clickhouse."""
13
+
14
+ host: str
15
+ dbname: str = DEFAULT_DATABASE
16
+ user: str = DEFAULT_USER
17
+ password: str = DEFAULT_PASSWORD
18
+ port: int = DEFAULT_PORT
@@ -0,0 +1,196 @@
1
+ from typing import Iterable
2
+ from uuid import uuid4
3
+
4
+ from light_compressor import (
5
+ CompressionMethod,
6
+ define_reader,
7
+ )
8
+ from nativelib import (
9
+ Column,
10
+ NativeReader,
11
+ )
12
+
13
+ from ..version import __version__
14
+ from .connector import CHConnector
15
+ from .defines import CHUNK_SIZE
16
+ from .errors import ClickhouseServerError
17
+ from .logger import Logger
18
+ from .pyo3http import (
19
+ HttpResponse,
20
+ HttpSession,
21
+ )
22
+
23
+
24
+ def string_error(data: bytes) -> str:
25
+ """Bytes to string decoder."""
26
+
27
+ return data.decode("utf-8", errors="replace").strip()
28
+
29
+
30
+ class HTTPCursor:
31
+ """Class for send queryes to Clickhouse server
32
+ and read/write Native format."""
33
+
34
+ def __init__(
35
+ self,
36
+ connector: CHConnector,
37
+ compression_method: CompressionMethod,
38
+ logger: Logger,
39
+ timeout: int,
40
+ ) -> None:
41
+ """Class initialization."""
42
+
43
+ self.connector = connector
44
+ self.compression_method = compression_method
45
+ self.logger = logger
46
+ self.timeout = timeout
47
+ self.session = HttpSession(timeout=self.timeout)
48
+ self.is_connected = False
49
+ self.headers = {
50
+ "Accept": "*/*",
51
+ "Connection": "keep-alive",
52
+ "User-Agent": f"{self.__class__.__name__}/{__version__}",
53
+ "Accept-Encoding": self.compression_method.method,
54
+ "Content-Encoding": self.compression_method.method,
55
+ "X-ClickHouse-User": self.connector.user,
56
+ "X-ClickHouse-Key": self.connector.password,
57
+ "X-ClickHouse-Compression": self.compression_method.method,
58
+ "X-ClickHouse-Format": "Native",
59
+ "X-Content-Type-Options": "nosniff",
60
+ }
61
+ self.mode = {
62
+ 443: "https",
63
+ }.get(int(self.connector.port), "http")
64
+ self.url = (
65
+ f"{self.mode}://{self.connector.host}:{self.connector.port}/"
66
+ "?enable_http_compression=1"
67
+ )
68
+ self.params = {
69
+ "database": connector.dbname,
70
+ "query": "",
71
+ "session_id": str(uuid4()),
72
+ }
73
+ self.check_length = {
74
+ CompressionMethod.NONE: 1024,
75
+ }
76
+ self.server_version = None
77
+
78
+ def send_hello(self) -> str:
79
+ """Get server version."""
80
+
81
+ reader = self.get_stream("SELECT version()")
82
+ server_version = tuple(reader.to_rows())[0][0]
83
+ self.is_connected = True
84
+ self.server_version = server_version
85
+ return self.server_version
86
+
87
+ def get_response(
88
+ self,
89
+ query: str,
90
+ data: Iterable[bytes] | None = None,
91
+ ) -> HttpResponse:
92
+ """Get response from clickhouse server."""
93
+
94
+ self.params["query"] = query
95
+
96
+ response = self.session.post(
97
+ url=self.url,
98
+ params=self.params,
99
+ headers=self.headers,
100
+ timeout=self.timeout,
101
+ data=data,
102
+ )
103
+ status = response.get_status()
104
+
105
+ if status != 200:
106
+
107
+ if not self.is_connected:
108
+ error = string_error(response.read())
109
+ response.close()
110
+ else:
111
+ bufferobj = define_reader(response, self.compression_method)
112
+ error = string_error(bufferobj.read(CHUNK_SIZE))
113
+ bufferobj.close()
114
+
115
+ self.logger.error(f"ClickhouseServerError: {error}")
116
+ raise ClickhouseServerError(error)
117
+
118
+ return response
119
+
120
+ def get_stream(
121
+ self,
122
+ query: str,
123
+ ) -> NativeReader:
124
+ """Get answer from server as unpacked stream file."""
125
+
126
+ stream = self.get_response(query)
127
+
128
+ try:
129
+ bufferobj = define_reader(stream, self.compression_method)
130
+ check_error = bufferobj.read(
131
+ self.check_length.get(self.compression_method, 4)
132
+ )[:4]
133
+ bufferobj.seek(0)
134
+ except EOFError:
135
+ error = (
136
+ "Code: 92. DB::Exception: (EMPTY_DATA_PASSED) "
137
+ f"(version {self.server_version} (official build))"
138
+ )
139
+ self.logger.error(f"ClickhouseServerError: {error}")
140
+ raise ClickhouseServerError(error)
141
+
142
+ if check_error == b"Code":
143
+ error = string_error(bufferobj.read(CHUNK_SIZE))
144
+ bufferobj.close()
145
+ self.logger.error(f"ClickhouseServerError: {error}")
146
+ raise ClickhouseServerError(error)
147
+
148
+ return NativeReader(bufferobj)
149
+
150
+ def upload_data(
151
+ self,
152
+ table: str,
153
+ data: Iterable[bytes],
154
+ ) -> None:
155
+ """Download data into table."""
156
+
157
+ self.get_response(
158
+ query=f"INSERT INTO {table} FORMAT Native",
159
+ data=data,
160
+ )
161
+
162
+ def metadata(
163
+ self,
164
+ table: str,
165
+ ) -> list[Column]:
166
+ """Get table metadata."""
167
+
168
+ reader = self.get_stream(f"DESCRIBE TABLE {table}")
169
+ return [
170
+ Column(*describe[:2])
171
+ for describe in reader.to_rows()
172
+ ]
173
+
174
+ def execute(
175
+ self,
176
+ query: str,
177
+ ) -> None:
178
+ """Simple exetute method without return."""
179
+
180
+ self.get_response(query)
181
+
182
+ def last_query(self) -> str:
183
+ """Show last query."""
184
+
185
+ return self.params["query"]
186
+
187
+ def refresh(self) -> None:
188
+ """Refresh Session ID."""
189
+
190
+ self.params["session_id"] = str(uuid4())
191
+
192
+ def close(self) -> None:
193
+ """Close HTTPCursor session."""
194
+
195
+ self.session.close()
196
+ self.is_connected = False
@@ -0,0 +1,6 @@
1
+ CHUNK_SIZE = 16_384
2
+ DEFAULT_DATABASE = ""
3
+ DEFAULT_USER = "default"
4
+ DEFAULT_PASSWORD = ""
5
+ DEFAULT_PORT = 8123
6
+ DBMS_DEFAULT_TIMEOUT_SEC = 300
@@ -0,0 +1,78 @@
1
+ from collections import OrderedDict
2
+ from typing import NamedTuple
3
+
4
+
5
+ class DBMetadata(NamedTuple):
6
+ """Database object."""
7
+
8
+ name: str
9
+ version: str
10
+ columns: OrderedDict
11
+
12
+
13
+ def truncate_text(text: str, max_length: int) -> str:
14
+ """Truncate text and add ellipsis if too long."""
15
+
16
+ if len(text) > max_length:
17
+ return text[: max_length - 1] + "…"
18
+ return text
19
+
20
+
21
+ def format_table(
22
+ metadata: DBMetadata,
23
+ direction: str,
24
+ table_width: int = 51,
25
+ ) -> list[str]:
26
+ """Format single table as list of lines."""
27
+
28
+ lines = []
29
+
30
+ title = f"{direction} [{metadata.name} {metadata.version}]"
31
+ lines.append(f"┌{''.ljust(table_width, '─')}┐")
32
+ lines.append(
33
+ f"│ {truncate_text(title, table_width - 1).ljust(table_width - 1)}│"
34
+ )
35
+ lines.append(f"╞{'═' * 25}╤{'═' * 25}╡")
36
+ lines.append(f"│ {'Column Name'.ljust(23)} │ {'Data Type'.ljust(23)} │")
37
+ lines.append(f"╞{'═' * 25}╪{'═' * 25}╡")
38
+
39
+ for i, (col_name, col_type) in enumerate(metadata.columns.items()):
40
+ truncated_name = truncate_text(col_name, 23)
41
+ truncated_type = truncate_text(str(col_type), 23)
42
+ lines.append(
43
+ f"│ {truncated_name.ljust(23)} │ {truncated_type.ljust(23)} │"
44
+ )
45
+ if i < len(metadata.columns) - 1:
46
+ lines.append(f"├{'─' * 25}┼{'─' * 25}┤")
47
+
48
+ lines.append(f"└{'─' * 25}┴{'─' * 25}┘")
49
+ return lines
50
+
51
+
52
+ def transfer_diagram(source: DBMetadata, destination: DBMetadata) -> str:
53
+ """Make transfer diagram with two tables and arrow."""
54
+
55
+ src_lines = format_table(source, "Source")
56
+ dest_lines = format_table(destination, "Destination")
57
+ max_lines = max(len(src_lines), len(dest_lines), 9)
58
+
59
+ src_lines.extend([" " * 53] * (max_lines - len(src_lines)))
60
+ dest_lines.extend([" " * 53] * (max_lines - len(dest_lines)))
61
+
62
+ middle_line = max_lines // 2
63
+ arrow_config = [
64
+ (middle_line - 3, " │╲ "),
65
+ (middle_line - 2, " │ ╲ "),
66
+ (middle_line - 1, "┌┘ ╲ "),
67
+ (middle_line, "│ ╲"),
68
+ (middle_line + 1, "│ ╱"),
69
+ (middle_line + 2, "└┐ ╱ "),
70
+ (middle_line + 3, " │ ╱ "),
71
+ (middle_line + 4, " │╱ "),
72
+ ]
73
+ arrow_map = {line: arrow for line, arrow in arrow_config}
74
+
75
+ return "Transfer data diagram:\n" + "\n".join(
76
+ f"{src_lines[row]} {arrow_map.get(row, ' ')} {dest_lines[row]}"
77
+ for row in range(max_lines)
78
+ )
@@ -0,0 +1,18 @@
1
+ class ClickhouseServerError(ValueError):
2
+ """Clickhouse errors."""
3
+
4
+
5
+ class NativeDumperError(Exception):
6
+ """NativeDumper base error."""
7
+
8
+
9
+ class NativeDumperReadError(NativeDumperError):
10
+ """NativeDumper read error."""
11
+
12
+
13
+ class NativeDumperWriteError(NativeDumperError):
14
+ """NativeDumper write error."""
15
+
16
+
17
+ class NativeDumperValueError(ValueError):
18
+ """NativeDumper value error."""
@@ -0,0 +1,70 @@
1
+ from datetime import datetime
2
+ from logging import (
3
+ DEBUG,
4
+ FileHandler,
5
+ Formatter,
6
+ Logger,
7
+ StreamHandler,
8
+ )
9
+ from os import makedirs
10
+ from os.path import dirname
11
+ from sys import stdout
12
+
13
+ from ..version import __version__
14
+
15
+
16
+ def root_dir() -> str:
17
+ """Get project directory."""
18
+
19
+ try:
20
+ import __main__
21
+
22
+ return dirname(__main__.__file__)
23
+ except AttributeError:
24
+ return ""
25
+
26
+
27
+ class DumperLogger(Logger):
28
+ """NativeDumper logger."""
29
+
30
+ def __init__(
31
+ self,
32
+ level: int = DEBUG,
33
+ use_console: bool = True,
34
+ ) -> None:
35
+ """Class initialize."""
36
+
37
+ super().__init__("NativeDumper")
38
+
39
+ self.fmt = (
40
+ f"%(asctime)s | %(levelname)-8s | ver {__version__} "
41
+ "| %(funcName)s-%(filename)s-%(lineno)04d <%(message)s>"
42
+ )
43
+ self.setLevel(level)
44
+ self.log_path = f"{root_dir()}/native_logs"
45
+ makedirs(self.log_path, exist_ok=True)
46
+
47
+ formatter = Formatter(
48
+ fmt=self.fmt,
49
+ datefmt="%Y-%m-%d %H:%M:%S",
50
+ )
51
+
52
+ file_handler = FileHandler(
53
+ "{}/{:%Y-%m-%d}_{}.log".format(
54
+ self.log_path,
55
+ datetime.now(),
56
+ self.name,
57
+ ),
58
+ encoding="utf-8",
59
+ )
60
+ file_handler.setLevel(DEBUG)
61
+ file_handler.setFormatter(formatter)
62
+ self.addHandler(file_handler)
63
+
64
+ if use_console:
65
+ console_handler = StreamHandler(stdout)
66
+ console_handler.setLevel(level)
67
+ console_handler.setFormatter(formatter)
68
+ self.addHandler(console_handler)
69
+
70
+ self.propagate = False
@@ -0,0 +1,34 @@
1
+ from re import split
2
+
3
+ def chunk_query(query: str | None) -> tuple[list[str]]:
4
+ """Chunk multiquery to queryes."""
5
+
6
+ if not query:
7
+ return [], []
8
+
9
+ pattern = r";(?=(?:[^']*'[^']*')*[^']*$)"
10
+ parts = [
11
+ part.strip(";").strip()
12
+ for part in split(pattern, query)
13
+ if part.strip(";").strip()
14
+ ]
15
+
16
+ if not parts:
17
+ return [], []
18
+
19
+ first_part: list[str] = []
20
+ second_part: list[str] = []
21
+
22
+ for i, part in enumerate(parts):
23
+ first_part.append(part)
24
+
25
+ if (i + 1 < len(parts) and parts[i + 1].lower().startswith(
26
+ ('with ', 'select ')
27
+ )
28
+ ):
29
+ second_part = parts[i + 1:]
30
+ break
31
+ else:
32
+ second_part = []
33
+
34
+ return first_part, second_part
@@ -0,0 +1,15 @@
1
+ [package]
2
+ name = "pyo3http"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+
6
+ [lib]
7
+ name = "pyo3http"
8
+ crate-type = ["cdylib"]
9
+
10
+ [dependencies]
11
+ pyo3 = { version = "0.26.0", features = ["extension-module"] }
12
+ reqwest = { version = "0.12.24", features = ["stream", "json"] }
13
+ tokio = { version = "1.0", features = ["full"] }
14
+ serde = { version = "1.0", features = ["derive"] }
15
+ serde_json = "1.0"