pgpack-dumper 0.3.3.3__cp311-cp311-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,67 @@
1
+ cdef class CopyReader:
2
+ """Read from iterable Copy object."""
3
+
4
+ def __init__(
5
+ self,
6
+ object copyobj,
7
+ ):
8
+ """Class initialization."""
9
+
10
+ self.copyobj = copyobj
11
+ self.iterator = iter(self.copyobj.__enter__())
12
+ self.bufferobj = bytearray()
13
+ self.closed = False
14
+ self.total_read = 0
15
+
16
+ cpdef bytes read(self, long long size):
17
+ """Read from copy."""
18
+
19
+ if self.closed:
20
+ raise RuntimeError("Copy object already closed.")
21
+
22
+ if size <= 0:
23
+ return b""
24
+
25
+ cdef object chunk
26
+ cdef Py_ssize_t buffer_len
27
+ cdef bytes result
28
+
29
+ try:
30
+ while len(self.bufferobj) < size:
31
+ chunk = next(self.iterator)
32
+ self.bufferobj.extend(chunk)
33
+
34
+ result = bytes(self.bufferobj[:size])
35
+ del self.bufferobj[:size]
36
+ self.total_read += len(result)
37
+ return result
38
+
39
+ except StopIteration:
40
+ self.close()
41
+ buffer_len = len(self.bufferobj)
42
+
43
+ if buffer_len > 0:
44
+ if size >= buffer_len:
45
+ result = bytes(self.bufferobj)
46
+ self.bufferobj = bytearray()
47
+ else:
48
+ result = bytes(self.bufferobj[:size])
49
+ del self.bufferobj[:size]
50
+ self.total_read += len(result)
51
+ return result
52
+ return b""
53
+
54
+ cpdef long long tell(self):
55
+ """Return the current stream position."""
56
+
57
+ if self.closed:
58
+ raise RuntimeError("Copy object already closed.")
59
+
60
+ return self.total_read
61
+
62
+ cpdef void close(self):
63
+ """Close CopyReader."""
64
+
65
+ if not self.closed:
66
+ self.copyobj.__exit__(None, None, None)
67
+ self.closed = True
@@ -0,0 +1,80 @@
1
+ from typing import (
2
+ Iterable,
3
+ NoReturn
4
+ )
5
+
6
+ from pgcopylib import PGCopyReader
7
+ from pgpack import (
8
+ PGPackReader,
9
+ metadata_reader,
10
+ )
11
+ from psycopg import Copy
12
+
13
+ from .reader import CopyReader
14
+
15
+
16
+ class StreamReader(PGPackReader):
17
+ """Class for stream read from PostgreSQL/GreenPlum."""
18
+
19
+ def __init__(
20
+ self,
21
+ metadata: bytes,
22
+ copyobj: Iterable[Copy],
23
+ ) -> None:
24
+ """Class initialization."""
25
+
26
+ self.metadata = metadata
27
+ self.copyobj = CopyReader(copyobj)
28
+ (
29
+ self.columns,
30
+ self.pgtypes,
31
+ self.pgparam,
32
+ ) = metadata_reader(self.metadata)
33
+ self.pgcopy = PGCopyReader(
34
+ self.copyobj,
35
+ self.pgtypes,
36
+ )
37
+
38
+ def __str__(self) -> str:
39
+ """String representation of PGPackReader."""
40
+
41
+ def to_col(text: str) -> str:
42
+ """Format string element."""
43
+
44
+ text = text[:14] + "…" if len(text) > 15 else text
45
+ return f" {text: <15} "
46
+
47
+ empty_line = (
48
+ "│-----------------+-----------------│"
49
+ )
50
+ end_line = (
51
+ "└─────────────────┴─────────────────┘"
52
+ )
53
+ _str = [
54
+ "<PostgreSQL/GreenPlum stream reader>",
55
+ "┌─────────────────┬─────────────────┐",
56
+ "│ Column Name │ PostgreSQL Type │",
57
+ "╞═════════════════╪═════════════════╡",
58
+ ]
59
+
60
+ for column, pgtype in zip(self.columns, self.pgtypes):
61
+ _str.append(
62
+ f"│{to_col(column)}│{to_col(pgtype.name)}│",
63
+ )
64
+ _str.append(empty_line)
65
+
66
+ _str[-1] = end_line
67
+ return "\n".join(_str) + f"""
68
+ Total columns: {len(self.columns)}
69
+ Readed rows: {self.pgcopy.num_rows}
70
+ """
71
+
72
+ def to_bytes(self) -> NoReturn:
73
+ """Get raw unpacked pgcopy data."""
74
+
75
+ raise NotImplementedError("Don't support in stream mode.")
76
+
77
+ def close(self) -> None:
78
+ """Close stream object."""
79
+
80
+ self.copyobj.close()
@@ -0,0 +1,34 @@
1
+ from enum import Enum
2
+ from typing import NamedTuple
3
+
4
+
5
+ class RelClass(NamedTuple):
6
+ """Postgres objects."""
7
+
8
+ rel_name: str
9
+ is_readobject: bool
10
+ is_readable: bool
11
+
12
+
13
+ class PGObject(RelClass, Enum):
14
+ """RelClass object from relkind value."""
15
+
16
+ r = RelClass("Relation table", True, True)
17
+ i = RelClass("Index", False, False)
18
+ S = RelClass("Sequence", False, False)
19
+ t = RelClass("Toast table", False, False)
20
+ v = RelClass("View", False, True)
21
+ m = RelClass("Materialized view", False, True)
22
+ c = RelClass("Composite type", False, False)
23
+ f = RelClass("Foreign table", False, True)
24
+ p = RelClass("Partitioned table", True, True)
25
+ I = RelClass("Partitioned index", False, True) # noqa: E741
26
+ u = RelClass("Temporary table", True, True)
27
+ o = RelClass("Optimized files", False, False)
28
+ b = RelClass("Block directory", False, False)
29
+ M = RelClass("Visibility map", False, False)
30
+
31
+ def __str__(self) -> str:
32
+ """String representation class."""
33
+
34
+ return self.rel_name
@@ -0,0 +1,465 @@
1
+ from collections import OrderedDict
2
+ from collections.abc import Generator
3
+ from gc import collect
4
+ from io import (
5
+ BufferedReader,
6
+ BufferedWriter,
7
+ )
8
+ from logging import Logger
9
+ from types import MethodType
10
+ from typing import (
11
+ Any,
12
+ Iterable,
13
+ Iterator,
14
+ Union,
15
+ )
16
+
17
+ from pgcopylib import PGCopyWriter
18
+ from pgpack import (
19
+ CompressionMethod,
20
+ PGPackReader,
21
+ PGPackWriter,
22
+ metadata_reader,
23
+ )
24
+ from psycopg import (
25
+ Connection,
26
+ Copy,
27
+ Cursor,
28
+ )
29
+ from pandas import DataFrame as PdFrame
30
+ from polars import DataFrame as PlFrame
31
+ from sqlparse import format as sql_format
32
+
33
+ from .common import (
34
+ CopyBuffer,
35
+ DBMetadata,
36
+ DumperLogger,
37
+ PGConnector,
38
+ PGPackDumperError,
39
+ PGPackDumperReadError,
40
+ PGPackDumperWriteError,
41
+ PGPackDumperWriteBetweenError,
42
+ StreamReader,
43
+ chunk_query,
44
+ make_columns,
45
+ query_template,
46
+ transfer_diagram,
47
+ )
48
+
49
+
50
+ class PGPackDumper:
51
+ """Class for read and write PGPack format."""
52
+
53
+ def __init__(
54
+ self,
55
+ connector: PGConnector,
56
+ compression_method: CompressionMethod = CompressionMethod.ZSTD,
57
+ logger: Logger | None = None,
58
+ ) -> None:
59
+ """Class initialization."""
60
+
61
+ if not logger:
62
+ logger = DumperLogger()
63
+
64
+ try:
65
+ self.connector: PGConnector = connector
66
+ self.compression_method: CompressionMethod = compression_method
67
+ self.logger = logger
68
+ self.connect: Connection = Connection.connect(
69
+ **self.connector._asdict()
70
+ )
71
+ self.cursor: Cursor = self.connect.cursor()
72
+ self.copy_buffer: CopyBuffer = CopyBuffer(self.cursor, self.logger)
73
+ self._dbmeta: DBMetadata | None = None
74
+ self._size = 0
75
+ except Exception as error:
76
+ self.logger.error(f"{error.__class__.__name__}: {error}")
77
+ raise PGPackDumperError(error)
78
+
79
+ self.cursor.execute(query_template("dbname"))
80
+ self.dbname = self.cursor.fetchone()[0]
81
+ self.version = (
82
+ f"{self.connect.info.server_version // 10000}."
83
+ f"{self.connect.info.server_version % 1000}"
84
+ )
85
+
86
+ if self.dbname == "greenplum":
87
+ self.cursor.execute(query_template("gpversion"))
88
+ gpversion = self.cursor.fetchone()[0]
89
+ self.version = f"{self.version} gp {gpversion}"
90
+
91
+ self.logger.info(
92
+ f"PGPackDumper initialized for host {self.connector.host}"
93
+ f"[version {self.version}]"
94
+ )
95
+
96
+ @staticmethod
97
+ def multiquery(dump_method: MethodType):
98
+ """Multiquery decorator."""
99
+
100
+ def wrapper(*args, **kwargs):
101
+
102
+ first_part: list[str]
103
+ second_part: list[str]
104
+
105
+ self: PGPackDumper = args[0]
106
+ cursor: Cursor = (kwargs.get("dumper_src") or self).cursor
107
+ query: str = kwargs.get("query_src") or kwargs.get("query")
108
+ part: int = 1
109
+ first_part, second_part = chunk_query(self.query_formatter(query))
110
+ total_prts = len(sum((first_part, second_part), [])) + int(
111
+ bool(kwargs.get("table_name") or kwargs.get("table_src"))
112
+ )
113
+
114
+ if len(first_part) > 1:
115
+ for query in first_part:
116
+ self.logger.info(f"Execute query {part}/{total_prts}")
117
+ cursor.execute(query)
118
+ part += 1
119
+
120
+ if second_part:
121
+ for key in ("query", "query_src"):
122
+ if key in kwargs:
123
+ kwargs[key] = second_part.pop(0)
124
+ break
125
+
126
+ self.logger.info(
127
+ f"Execute stream {part}/{total_prts} [pgcopy mode]"
128
+ )
129
+ output = dump_method(*args, **kwargs)
130
+
131
+ if second_part:
132
+ for query in second_part:
133
+ part += 1
134
+ self.logger.info(f"Execute query {part}/{total_prts}")
135
+ cursor.execute(query)
136
+
137
+ if output:
138
+ self.refresh()
139
+
140
+ collect()
141
+ return output
142
+
143
+ return wrapper
144
+
145
+ def query_formatter(self, query: str) -> str | None:
146
+ """Reformat query."""
147
+
148
+ if not query:
149
+ return
150
+ return sql_format(sql=query, strip_comments=True).strip().strip(";")
151
+
152
+ @multiquery
153
+ def __read_dump(
154
+ self,
155
+ fileobj: BufferedWriter,
156
+ query: str | None,
157
+ table_name: str | None,
158
+ ) -> bool:
159
+ """Internal method read_dump for generate kwargs to decorator."""
160
+
161
+ def __read_data(
162
+ copy_to: Iterator[Copy],
163
+ ) -> Generator[bytes, None, None]:
164
+ """Generate bytes from copy object with calc size."""
165
+
166
+ self._size = 0
167
+
168
+ for data in copy_to:
169
+ chunk = bytes(data)
170
+ self._size += len(chunk)
171
+ yield chunk
172
+
173
+ try:
174
+ self.copy_buffer.query = query
175
+ self.copy_buffer.table_name = table_name
176
+ metadata = self.copy_buffer.metadata
177
+ pgpack = PGPackWriter(
178
+ fileobj,
179
+ metadata,
180
+ self.compression_method,
181
+ )
182
+ columns = make_columns(*metadata_reader(metadata))
183
+ source = DBMetadata(
184
+ name=self.dbname,
185
+ version=self.version,
186
+ columns=columns,
187
+ )
188
+ destination = DBMetadata(
189
+ name="file",
190
+ version=fileobj.name,
191
+ columns=columns,
192
+ )
193
+ self.logger.info(transfer_diagram(source, destination))
194
+
195
+ with self.copy_buffer.copy_to() as copy_to:
196
+ pgpack.from_bytes(__read_data(copy_to))
197
+
198
+ pgpack.close()
199
+ self.logger.info(f"Successfully read {self._size} bytes.")
200
+ self.logger.info(
201
+ f"Read pgpack dump from {self.connector.host} done."
202
+ )
203
+ return True
204
+ except Exception as error:
205
+ self.logger.error(f"{error.__class__.__name__}: {error}")
206
+ raise PGPackDumperReadError(error)
207
+
208
+ @multiquery
209
+ def __write_between(
210
+ self,
211
+ table_dest: str,
212
+ table_src: str | None,
213
+ query_src: str | None,
214
+ dumper_src: Union["PGPackDumper", object],
215
+ ) -> bool:
216
+ """Internal method write_between for generate kwargs to decorator."""
217
+
218
+ try:
219
+ if not dumper_src:
220
+ connect = Connection.connect(**self.connector._asdict())
221
+ self.logger.info(
222
+ f"Set new connection for host {self.connector.host}."
223
+ )
224
+ source_copy_buffer = CopyBuffer(
225
+ connect.cursor(),
226
+ self.logger,
227
+ query_src,
228
+ table_src,
229
+ )
230
+ src_dbname = self.dbname
231
+ src_version = self.version
232
+ elif dumper_src.__class__ is PGPackDumper:
233
+ source_copy_buffer = dumper_src.copy_buffer
234
+ source_copy_buffer.table_name = table_src
235
+ source_copy_buffer.query = query_src
236
+ src_dbname = dumper_src.dbname
237
+ src_version = dumper_src.version
238
+ else:
239
+ reader = dumper_src.to_reader(
240
+ query=query_src,
241
+ table_name=table_src,
242
+ )
243
+ dtype_data = reader.to_rows()
244
+ self.from_rows(
245
+ dtype_data=dtype_data,
246
+ table_name=table_dest,
247
+ source=dumper_src._dbmeta,
248
+ )
249
+ size = reader.tell()
250
+ self.logger.info(f"Successfully sending {size} bytes.")
251
+ return reader.close()
252
+
253
+ self.copy_buffer.table_name = table_dest
254
+ self.copy_buffer.query = None
255
+ source = DBMetadata(
256
+ name=src_dbname,
257
+ version=src_version,
258
+ columns=make_columns(
259
+ *metadata_reader(source_copy_buffer.metadata),
260
+ ),
261
+ )
262
+ destination = DBMetadata(
263
+ name=self.dbname,
264
+ version=self.version,
265
+ columns=make_columns(
266
+ *metadata_reader(self.copy_buffer.metadata),
267
+ ),
268
+ )
269
+ self.logger.info(transfer_diagram(source, destination))
270
+ self.copy_buffer.copy_between(source_copy_buffer)
271
+ self.connect.commit()
272
+ return True
273
+ except Exception as error:
274
+ self.logger.error(f"{error.__class__.__name__}: {error}")
275
+ raise PGPackDumperWriteBetweenError(error)
276
+
277
+ @multiquery
278
+ def __to_reader(
279
+ self,
280
+ query: str | None,
281
+ table_name: str | None,
282
+ ) -> StreamReader:
283
+ """Internal method to_reader for generate kwargs to decorator."""
284
+
285
+ self.copy_buffer.query = query
286
+ self.copy_buffer.table_name = table_name
287
+ metadata = self.copy_buffer.metadata
288
+ self._dbmeta = DBMetadata(
289
+ name=self.dbname,
290
+ version=self.version,
291
+ columns=make_columns(
292
+ *metadata_reader(metadata),
293
+ ),
294
+ )
295
+ return StreamReader(
296
+ metadata,
297
+ self.copy_buffer.copy_to(),
298
+ )
299
+
300
+ def read_dump(
301
+ self,
302
+ fileobj: BufferedWriter,
303
+ query: str | None = None,
304
+ table_name: str | None = None,
305
+ ) -> bool:
306
+ """Read PGPack dump from PostgreSQL/GreenPlum."""
307
+
308
+ return self.__read_dump(
309
+ fileobj=fileobj,
310
+ query=query,
311
+ table_name=table_name,
312
+ )
313
+
314
+ def write_dump(
315
+ self,
316
+ fileobj: BufferedReader,
317
+ table_name: str,
318
+ ) -> None:
319
+ """Write PGPack dump into PostgreSQL/GreenPlum."""
320
+
321
+ try:
322
+ self.copy_buffer.table_name = table_name
323
+ pgpack = PGPackReader(fileobj)
324
+ source = DBMetadata(
325
+ name="file",
326
+ version=fileobj.name,
327
+ columns=make_columns(
328
+ pgpack.columns,
329
+ pgpack.pgtypes,
330
+ pgpack.pgparam,
331
+ ),
332
+ )
333
+ destination = DBMetadata(
334
+ name=self.dbname,
335
+ version=self.version,
336
+ columns=make_columns(
337
+ *metadata_reader(self.copy_buffer.metadata),
338
+ ),
339
+ )
340
+ self.logger.info(transfer_diagram(source, destination))
341
+ collect()
342
+ self.copy_buffer.copy_from(pgpack.to_bytes())
343
+ self.connect.commit()
344
+ pgpack.close()
345
+ self.refresh()
346
+ except Exception as error:
347
+ self.logger.error(f"{error.__class__.__name__}: {error}")
348
+ raise PGPackDumperWriteError(error)
349
+
350
+ def write_between(
351
+ self,
352
+ table_dest: str,
353
+ table_src: str | None = None,
354
+ query_src: str | None = None,
355
+ dumper_src: Union["PGPackDumper", object] = None,
356
+ ) -> None:
357
+ """Write from PostgreSQL/GreenPlum into PostgreSQL/GreenPlum."""
358
+
359
+ return self.__write_between(
360
+ table_dest=table_dest,
361
+ table_src=table_src,
362
+ query_src=query_src,
363
+ dumper_src=dumper_src,
364
+ )
365
+
366
+ def to_reader(
367
+ self,
368
+ query: str | None = None,
369
+ table_name: str | None = None,
370
+ ) -> StreamReader:
371
+ """Get stream from PostgreSQL/GreenPlum as StreamReader object."""
372
+
373
+ return self.__to_reader(
374
+ query=query,
375
+ table_name=table_name,
376
+ )
377
+
378
+ def from_rows(
379
+ self,
380
+ dtype_data: Iterable[Any],
381
+ table_name: str,
382
+ source: DBMetadata | None = None,
383
+ ) -> None:
384
+ """Write from python iterable object
385
+ into PostgreSQL/GreenPlum table."""
386
+
387
+ if not source:
388
+ source = DBMetadata(
389
+ name="python",
390
+ version="iterable object",
391
+ columns={"Unknown": "Unknown"},
392
+ )
393
+
394
+ self.copy_buffer.table_name = table_name
395
+ columns, pgtypes, pgparam = metadata_reader(self.copy_buffer.metadata)
396
+ writer = PGCopyWriter(None, pgtypes)
397
+ destination = DBMetadata(
398
+ name=self.dbname,
399
+ version=self.version,
400
+ columns=make_columns(
401
+ list_columns=columns,
402
+ pgtypes=pgtypes,
403
+ pgparam=pgparam,
404
+ ),
405
+ )
406
+ self.logger.info(transfer_diagram(source, destination))
407
+ collect()
408
+ self.copy_buffer.copy_from(writer.from_rows(dtype_data))
409
+ self.connect.commit()
410
+ self.refresh()
411
+
412
+ def from_pandas(
413
+ self,
414
+ data_frame: PdFrame,
415
+ table_name: str,
416
+ ) -> None:
417
+ """Write from pandas.DataFrame into PostgreSQL/GreenPlum table."""
418
+
419
+ self.from_rows(
420
+ dtype_data=iter(data_frame.values),
421
+ table_name=table_name,
422
+ source=DBMetadata(
423
+ name="pandas",
424
+ version="DataFrame",
425
+ columns=OrderedDict(zip(
426
+ data_frame.columns,
427
+ [str(dtype) for dtype in data_frame.dtypes],
428
+ )),
429
+ )
430
+ )
431
+
432
+ def from_polars(
433
+ self,
434
+ data_frame: PlFrame,
435
+ table_name: str,
436
+ ) -> None:
437
+ """Write from polars.DataFrame into PostgreSQL/GreenPlum table."""
438
+
439
+ self.from_rows(
440
+ dtype_data=data_frame.iter_rows(),
441
+ table_name=table_name,
442
+ source=DBMetadata(
443
+ name="polars",
444
+ version="DataFrame",
445
+ columns=OrderedDict(zip(
446
+ data_frame.columns,
447
+ [str(dtype) for dtype in data_frame.dtypes],
448
+ )),
449
+ )
450
+ )
451
+
452
+ def refresh(self) -> None:
453
+ """Refresh session."""
454
+
455
+ self.connect = Connection.connect(**self.connector._asdict())
456
+ self.cursor = self.connect.cursor()
457
+ self.copy_buffer.cursor = self.cursor
458
+ self.logger.info(f"Connection to host {self.connector.host} updated.")
459
+
460
+ def close(self) -> None:
461
+ """Close session."""
462
+
463
+ self.cursor.close()
464
+ self.connect.close()
465
+ self.logger.info(f"Connection to host {self.connector.host} closed.")
@@ -0,0 +1 @@
1
+ __version__ = "0.3.3.3"