pgpack-dumper 0.3.0.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ """Library for read and write PGPack format between PostgreSQL and file."""
2
+
3
+ from pgcopylib import (
4
+ PGCopyReader,
5
+ PGCopyWriter,
6
+ )
7
+ from pgpack import (
8
+ CompressionMethod,
9
+ PGPackReader,
10
+ PGPackWriter,
11
+ )
12
+
13
+ from .common import (
14
+ PGConnector,
15
+ CopyBuffer,
16
+ CopyBufferError,
17
+ CopyBufferObjectError,
18
+ CopyBufferTableNotDefined,
19
+ PGPackDumperError,
20
+ PGPackDumperReadError,
21
+ PGPackDumperWriteError,
22
+ PGPackDumperWriteBetweenError,
23
+ )
24
+ from .dumper import PGPackDumper
25
+ from .version import __version__
26
+
27
+
28
+ __all__ = (
29
+ "__version__",
30
+ "CompressionMethod",
31
+ "CopyBuffer",
32
+ "CopyBufferError",
33
+ "CopyBufferObjectError",
34
+ "CopyBufferTableNotDefined",
35
+ "PGConnector",
36
+ "PGCopyReader",
37
+ "PGCopyWriter",
38
+ "PGPackDumper",
39
+ "PGPackDumperError",
40
+ "PGPackDumperReadError",
41
+ "PGPackDumperWriteError",
42
+ "PGPackDumperWriteBetweenError",
43
+ "PGPackReader",
44
+ "PGPackWriter",
45
+ )
46
+ __author__ = "0xMihalich"
@@ -0,0 +1,58 @@
1
+ """Common functions and classes."""
2
+
3
+ from .columns import make_columns
4
+ from .connector import PGConnector
5
+ from .copy import CopyBuffer
6
+ from .diagram import (
7
+ DBMetadata,
8
+ format_table,
9
+ transfer_diagram,
10
+ )
11
+ from .errors import (
12
+ CopyBufferError,
13
+ CopyBufferObjectError,
14
+ CopyBufferTableNotDefined,
15
+ PGPackDumperError,
16
+ PGPackDumperReadError,
17
+ PGPackDumperWriteError,
18
+ PGPackDumperWriteBetweenError,
19
+ )
20
+ from .logger import DumperLogger
21
+ from .metadata import read_metadata
22
+ from .query import (
23
+ chunk_query,
24
+ query_path,
25
+ query_template,
26
+ random_name,
27
+ search_object,
28
+ )
29
+ from .reader import CopyReader
30
+ from .stream import StreamReader
31
+ from .structs import PGObject
32
+
33
+
34
+ __all__ = (
35
+ "CopyBuffer",
36
+ "CopyBufferError",
37
+ "CopyBufferObjectError",
38
+ "CopyBufferTableNotDefined",
39
+ "CopyReader",
40
+ "DBMetadata",
41
+ "DumperLogger",
42
+ "PGConnector",
43
+ "PGObject",
44
+ "PGPackDumperError",
45
+ "PGPackDumperReadError",
46
+ "PGPackDumperWriteBetweenError",
47
+ "PGPackDumperWriteError",
48
+ "StreamReader",
49
+ "chunk_query",
50
+ "format_table",
51
+ "make_columns",
52
+ "query_path",
53
+ "query_template",
54
+ "random_name",
55
+ "read_metadata",
56
+ "search_object",
57
+ "transfer_diagram",
58
+ )
@@ -0,0 +1,30 @@
1
+ from collections import OrderedDict
2
+
3
+ from pgcopylib import PGOid
4
+ from pgpack.common import PGParam
5
+
6
+
7
+ def make_columns(
8
+ list_columns: list[str],
9
+ pgtypes: list[PGOid],
10
+ pgparam: list[PGParam],
11
+ ) -> OrderedDict[str, str]:
12
+ """Make DBMetadata.columns dictionary."""
13
+
14
+ columns = OrderedDict()
15
+
16
+ for col_name, pgtype, param in zip(
17
+ list_columns,
18
+ pgtypes,
19
+ pgparam,
20
+ ):
21
+ col_type = pgtype.name
22
+
23
+ if pgtype is PGOid.bpchar:
24
+ col_type = f"{col_type}({param.length})"
25
+ elif pgtype is PGOid.numeric:
26
+ col_type = f"{col_type}({param.length}, {param.scale})"
27
+
28
+ columns[col_name] = col_type
29
+
30
+ return columns
@@ -0,0 +1,11 @@
1
+ from typing import NamedTuple
2
+
3
+
4
+ class PGConnector(NamedTuple):
5
+ """Connector for PostgreSQL."""
6
+
7
+ host: str
8
+ dbname: str
9
+ user: str
10
+ password: str
11
+ port: int
@@ -0,0 +1,157 @@
1
+ from logging import Logger
2
+ from typing import (
3
+ Generator,
4
+ Iterator,
5
+ )
6
+
7
+ from psycopg import (
8
+ Copy,
9
+ Cursor,
10
+ )
11
+
12
+ from .errors import (
13
+ CopyBufferObjectError,
14
+ CopyBufferTableNotDefined,
15
+ )
16
+ from .query import (
17
+ query_template,
18
+ search_object,
19
+ )
20
+ from .structs import PGObject
21
+ from .metadata import read_metadata
22
+
23
+
24
+ class CopyBuffer:
25
+
26
+ def __init__(
27
+ self,
28
+ cursor: Cursor,
29
+ logger: Logger,
30
+ query: str | None = None,
31
+ table_name: str | None = None,
32
+ ) -> None:
33
+ """Class initialization."""
34
+
35
+ self.cursor = cursor
36
+ self.logger = logger
37
+ self.query = query
38
+ self.table_name = table_name
39
+
40
+ @property
41
+ def metadata(self) -> bytes:
42
+ """Get metadata as bytes."""
43
+
44
+ host = self.cursor.connection.info.host
45
+ self.logger.info(f"Start read metadata from host {host}.")
46
+ metadata = read_metadata(
47
+ self.cursor,
48
+ self.query,
49
+ self.table_name,
50
+ )
51
+ self.logger.info(f"Read metadata from host {host} done.")
52
+ return metadata
53
+
54
+ def copy_to(self) -> Iterator[Copy]:
55
+ """Get copy object from PostgreSQL."""
56
+
57
+ if not self.query and not self.table_name:
58
+ error_msg = "Query or table not defined."
59
+ self.logger.error(f"CopyBufferTableNotDefined: {error_msg}")
60
+ raise CopyBufferTableNotDefined(error_msg)
61
+
62
+ host = self.cursor.connection.info.host
63
+
64
+ if not self.query:
65
+ self.logger.info(f"Start read from {host}.{self.table_name}.")
66
+ self.cursor.execute(query_template("relkind").format(
67
+ table_name=self.table_name,
68
+ ))
69
+ relkind = self.cursor.fetchone()[0]
70
+ pg_object = PGObject[relkind]
71
+ if not pg_object.is_readable:
72
+ error_msg = f"Read from {pg_object} not support."
73
+ self.logger.error(f"CopyBufferObjectError: {error_msg}")
74
+ raise CopyBufferObjectError(error_msg)
75
+ self.logger.info(f"Use method read from {pg_object}.")
76
+ if not pg_object.is_readobject:
77
+ self.table_name = f"(select * from {self.table_name})"
78
+ elif self.query:
79
+ self.logger.info(f"Start read query from {host}.")
80
+ self.logger.info("Use method read from select.")
81
+ self.table_name = f"({self.query})"
82
+
83
+ return self.cursor.copy(
84
+ query_template("copy_to").format(table_name=self.table_name)
85
+ )
86
+
87
+ def copy_from(
88
+ self,
89
+ copyobj: Iterator[bytes],
90
+ ) -> None:
91
+ """Write PGCopy dump into PostgreSQL."""
92
+
93
+ if not self.table_name:
94
+ error_msg = "Table not defined."
95
+ self.logger.error(f"CopyBufferTableNotDefined: {error_msg}")
96
+ raise CopyBufferTableNotDefined(error_msg)
97
+
98
+ host = self.cursor.connection.info.host
99
+ size = 0
100
+ self.logger.info(f"Start write into {host}.{self.table_name}.")
101
+
102
+ with self.cursor.copy(
103
+ query_template("copy_from").format(table_name=self.table_name)
104
+ ) as cp:
105
+ for bytes_data in copyobj:
106
+ size += len(bytes_data)
107
+ cp.write(bytes_data)
108
+
109
+ self.logger.info(f"Successfully sending {size} bytes.")
110
+ self.logger.info(f"Write into {host}.{self.table_name} done.")
111
+
112
+ def copy_between(
113
+ self,
114
+ copy_buffer: "CopyBuffer",
115
+ ) -> None:
116
+ """Write from PostgreSQL into PostgreSQL."""
117
+
118
+ with copy_buffer.copy_to() as copy_to:
119
+ destination_host = self.cursor.connection.info.host
120
+ source_host = copy_buffer.cursor.connection.info.host
121
+ source_object = search_object(
122
+ copy_buffer.table_name,
123
+ copy_buffer.query,
124
+ )
125
+ size = 0
126
+ self.logger.info(
127
+ f"Copy {source_object} from {source_host} into "
128
+ f"{destination_host}.{self.table_name} started."
129
+ )
130
+
131
+ with self.cursor.copy(
132
+ query_template("copy_from").format(table_name=self.table_name)
133
+ ) as copy_from:
134
+ for data in copy_to:
135
+ size += len(data)
136
+ copy_from.write(data)
137
+
138
+ self.logger.info(f"Successfully sending {size} bytes.")
139
+ self.logger.info(
140
+ f"Copy {source_object} from {source_host}"
141
+ f"into {destination_host}.{self.table_name} done."
142
+ )
143
+
144
+ def copy_reader(self) -> Generator[bytes, None, None]:
145
+ """Read bytes from copy object."""
146
+
147
+ host = self.cursor.connection.info.host
148
+ source = search_object(
149
+ self.table_name,
150
+ self.query,
151
+ )
152
+
153
+ with self.copy_to() as copy_object:
154
+ for data in copy_object:
155
+ yield bytes(data)
156
+
157
+ self.logger.info(f"Read {source} from {host} done.")
@@ -0,0 +1,78 @@
1
+ from collections import OrderedDict
2
+ from typing import NamedTuple
3
+
4
+
5
+ class DBMetadata(NamedTuple):
6
+ """Database object."""
7
+
8
+ name: str
9
+ version: str
10
+ columns: OrderedDict
11
+
12
+
13
+ def truncate_text(text: str, max_length: int) -> str:
14
+ """Truncate text and add ellipsis if too long."""
15
+
16
+ if len(text) > max_length:
17
+ return text[: max_length - 1] + "…"
18
+ return text
19
+
20
+
21
+ def format_table(
22
+ metadata: DBMetadata,
23
+ direction: str,
24
+ table_width: int = 51,
25
+ ) -> list[str]:
26
+ """Format single table as list of lines."""
27
+
28
+ lines = []
29
+
30
+ title = f"{direction} [{metadata.name} {metadata.version}]"
31
+ lines.append(f"┌{''.ljust(table_width, '─')}┐")
32
+ lines.append(
33
+ f"│ {truncate_text(title, table_width - 1).ljust(table_width - 1)}│"
34
+ )
35
+ lines.append(f"╞{'═' * 25}╤{'═' * 25}╡")
36
+ lines.append(f"│ {'Column Name'.ljust(23)} │ {'Data Type'.ljust(23)} │")
37
+ lines.append(f"╞{'═' * 25}╪{'═' * 25}╡")
38
+
39
+ for i, (col_name, col_type) in enumerate(metadata.columns.items()):
40
+ truncated_name = truncate_text(col_name, 23)
41
+ truncated_type = truncate_text(str(col_type), 23)
42
+ lines.append(
43
+ f"│ {truncated_name.ljust(23)} │ {truncated_type.ljust(23)} │"
44
+ )
45
+ if i < len(metadata.columns) - 1:
46
+ lines.append(f"├{'─' * 25}┼{'─' * 25}┤")
47
+
48
+ lines.append(f"└{'─' * 25}┴{'─' * 25}┘")
49
+ return lines
50
+
51
+
52
+ def transfer_diagram(source: DBMetadata, destination: DBMetadata) -> str:
53
+ """Make transfer diagram with two tables and arrow."""
54
+
55
+ src_lines = format_table(source, "Source")
56
+ dest_lines = format_table(destination, "Destination")
57
+ max_lines = max(len(src_lines), len(dest_lines), 9)
58
+
59
+ src_lines.extend([" " * 53] * (max_lines - len(src_lines)))
60
+ dest_lines.extend([" " * 53] * (max_lines - len(dest_lines)))
61
+
62
+ middle_line = max_lines // 2
63
+ arrow_config = [
64
+ (middle_line - 3, " │╲ "),
65
+ (middle_line - 2, " │ ╲ "),
66
+ (middle_line - 1, "┌┘ ╲ "),
67
+ (middle_line, "│ ╲"),
68
+ (middle_line + 1, "│ ╱"),
69
+ (middle_line + 2, "└┐ ╱ "),
70
+ (middle_line + 3, " │ ╱ "),
71
+ (middle_line + 4, " │╱ "),
72
+ ]
73
+ arrow_map = {line: arrow for line, arrow in arrow_config}
74
+
75
+ return "Transfer data diagram:\n" + "\n".join(
76
+ f"{src_lines[row]} {arrow_map.get(row, ' ')} {dest_lines[row]}"
77
+ for row in range(max_lines)
78
+ )
@@ -0,0 +1,26 @@
1
+ class CopyBufferError(Exception):
2
+ """CopyBuffer base error."""
3
+
4
+
5
+ class CopyBufferObjectError(TypeError):
6
+ """Destination object not support."""
7
+
8
+
9
+ class CopyBufferTableNotDefined(ValueError):
10
+ """Destination table not defined."""
11
+
12
+
13
+ class PGPackDumperError(Exception):
14
+ """PGPackDumper base error."""
15
+
16
+
17
+ class PGPackDumperReadError(PGPackDumperError):
18
+ """PGPackDumper read error."""
19
+
20
+
21
+ class PGPackDumperWriteError(PGPackDumperError):
22
+ """PGPackDumper write error."""
23
+
24
+
25
+ class PGPackDumperWriteBetweenError(PGPackDumperWriteError):
26
+ """PGPackDumper write between error."""
@@ -0,0 +1,70 @@
1
+ from datetime import datetime
2
+ from logging import (
3
+ DEBUG,
4
+ FileHandler,
5
+ Formatter,
6
+ Logger,
7
+ StreamHandler,
8
+ )
9
+ from os import makedirs
10
+ from os.path import dirname
11
+ from sys import stdout
12
+
13
+ from ..version import __version__
14
+
15
+
16
+ def root_dir() -> str:
17
+ """Get project directory."""
18
+
19
+ try:
20
+ import __main__
21
+
22
+ return dirname(__main__.__file__)
23
+ except AttributeError:
24
+ return ""
25
+
26
+
27
+ class DumperLogger(Logger):
28
+ """PGPackDumper logger."""
29
+
30
+ def __init__(
31
+ self,
32
+ level: int = DEBUG,
33
+ use_console: bool = True,
34
+ ) -> None:
35
+ """Class initialize."""
36
+
37
+ super().__init__("PGPackDumper")
38
+
39
+ self.fmt = (
40
+ f"%(asctime)s | %(levelname)-8s | ver {__version__} "
41
+ "| %(funcName)s-%(filename)s-%(lineno)04d <%(message)s>"
42
+ )
43
+ self.setLevel(level)
44
+ self.log_path = f"{root_dir()}/pgpack_logs"
45
+ makedirs(self.log_path, exist_ok=True)
46
+
47
+ formatter = Formatter(
48
+ fmt=self.fmt,
49
+ datefmt="%Y-%m-%d %H:%M:%S",
50
+ )
51
+
52
+ file_handler = FileHandler(
53
+ "{}/{:%Y-%m-%d}_{}.log".format(
54
+ self.log_path,
55
+ datetime.now(),
56
+ self.name,
57
+ ),
58
+ encoding="utf-8",
59
+ )
60
+ file_handler.setLevel(DEBUG)
61
+ file_handler.setFormatter(formatter)
62
+ self.addHandler(file_handler)
63
+
64
+ if use_console:
65
+ console_handler = StreamHandler(stdout)
66
+ console_handler.setLevel(level)
67
+ console_handler.setFormatter(formatter)
68
+ self.addHandler(console_handler)
69
+
70
+ self.propagate = False
@@ -0,0 +1,38 @@
1
+ from psycopg import Cursor
2
+
3
+ from .query import (
4
+ query_template,
5
+ random_name,
6
+ )
7
+
8
+
9
+ def read_metadata(
10
+ cursor: Cursor,
11
+ query: str | None = None,
12
+ table_name: str | None = None,
13
+ ) -> bytes:
14
+ """Read metadata for query or table."""
15
+
16
+ if not query and not table_name:
17
+ raise ValueError()
18
+
19
+ if query:
20
+ session_name = random_name()
21
+ prepare_name = f"{session_name}_prepare"
22
+ table_name = f"{session_name}_temp"
23
+ cursor.execute(query_template("prepare").format(
24
+ prepare_name=prepare_name,
25
+ query=query,
26
+ table_name=table_name,
27
+ ))
28
+
29
+ cursor.execute(query_template("attributes").format(
30
+ table_name=table_name,
31
+ ))
32
+
33
+ metadata: bytes = cursor.fetchone()[0]
34
+
35
+ if query:
36
+ cursor.execute(f"drop table if exists {table_name};")
37
+
38
+ return metadata
@@ -0,0 +1,62 @@
1
+ from pathlib import Path
2
+ from random import randbytes
3
+ from re import match
4
+
5
+
6
+ pattern = r"\(select \* from (.*)\)|(.*)"
7
+
8
+
9
+ def search_object(table: str, query: str = "") -> str:
10
+ """Return current string for object."""
11
+
12
+ if query:
13
+ return "query"
14
+
15
+ return match(pattern, table).group(1) or table
16
+
17
+
18
+ def random_name() -> str:
19
+ """Generate random name for prepare and temp table."""
20
+
21
+ return f"session_{randbytes(8).hex()}" # noqa: S311
22
+
23
+
24
+ def query_path() -> str:
25
+ """Path for queryes."""
26
+
27
+ return f"{Path(__file__).parent.absolute()}/queryes/{{}}.sql"
28
+
29
+
30
+ def query_template(query_name: str) -> str:
31
+ """Get query template for his name."""
32
+
33
+ path = query_path().format(query_name)
34
+
35
+ with open(path, encoding="utf-8") as query:
36
+ return query.read()
37
+
38
+
39
+ def chunk_query(query: str | None) -> tuple[list[str]]:
40
+ """Chunk multiquery to queryes."""
41
+
42
+ if not query:
43
+ return [], []
44
+
45
+ first_part: list[str] = [
46
+ part.strip()
47
+ for part in query.split(";")
48
+ ]
49
+ second_part: list[str] = []
50
+
51
+ for _ in first_part:
52
+ second_part.append(first_part.pop())
53
+ if any(
54
+ word == second_part[-1][:len(word)].lower()
55
+ for word in ("with", "select")
56
+ ):
57
+ second_part = list(reversed(second_part))
58
+ break
59
+
60
+ return first_part, second_part
61
+
62
+
@@ -0,0 +1,4 @@
1
+ select json_agg(json_build_array(attnum, json_build_array(attname, atttypid::int4,
2
+ case when atttypid = 1042 then atttypmod - 4 when atttypid = 1700 then (atttypmod - 4) >> 16 else attlen end,
3
+ case when atttypid = 1700 then (atttypmod - 4) & 65535 else 0 end, attndims)))::text::bytea as metadata
4
+ from pg_attribute where attrelid = '{table_name}'::regclass and attnum > 0 and not attisdropped;
@@ -0,0 +1 @@
1
+ copy {table_name} from stdin with (format binary);
@@ -0,0 +1 @@
1
+ copy {table_name} to stdout with (format binary);
@@ -0,0 +1,2 @@
1
+ select case when count(nspname) = 1 then 'greenplum' else 'postgres' end as dbname
2
+ from pg_catalog.pg_namespace where nspname = 'gp_toolkit';
@@ -0,0 +1 @@
1
+ select substring(version() from 'Greenplum Database (.*?) build') as gp_version;
@@ -0,0 +1,4 @@
1
+ prepare {prepare_name} as {query} limit 0;
2
+ drop table if exists {table_name};
3
+ create temporary table {table_name} as execute {prepare_name} (null);
4
+ deallocate prepare {prepare_name};
@@ -0,0 +1 @@
1
+ select relkind from pg_class where oid = '{table_name}'::regclass;
@@ -0,0 +1,11 @@
1
+ cdef class CopyReader:
2
+
3
+ cdef object copyobj
4
+ cdef object iterator
5
+ cdef object bufferobj
6
+ cdef bint closed
7
+ cdef long long total_read
8
+
9
+ cpdef bytes read(self, long long size)
10
+ cpdef long long tell(self)
11
+ cpdef void close(self)
@@ -0,0 +1,35 @@
1
+ from typing import Iterator, Iterable
2
+
3
+ from psycopg import Copy
4
+
5
+
6
+ class CopyReader:
7
+ """Read from iterable Copy object."""
8
+
9
+ def __init__(
10
+ self,
11
+ copyobj: Iterable[Copy],
12
+ ) -> None:
13
+ """Class initialization."""
14
+
15
+ self.copyobj: Iterable[Copy]
16
+ self.iterator: Iterator[bytearray]
17
+ self.bufferobj: bytearray
18
+ self.closed: bool
19
+ self.total_read: int
20
+ ...
21
+
22
+ def read(self, size: int) -> bytes:
23
+ """Read from copy."""
24
+
25
+ ...
26
+
27
+ def tell(self) -> int:
28
+ """Return the current stream position."""
29
+
30
+ ...
31
+
32
+ def close(self) -> None:
33
+ """Close CopyReader."""
34
+
35
+ ...