pgpack-dumper 0.3.0.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,67 @@
1
+ cdef class CopyReader:
2
+ """Read from iterable Copy object."""
3
+
4
+ def __init__(
5
+ self,
6
+ object copyobj,
7
+ ):
8
+ """Class initialization."""
9
+
10
+ self.copyobj = copyobj
11
+ self.iterator = iter(self.copyobj.__enter__())
12
+ self.bufferobj = bytearray()
13
+ self.closed = False
14
+ self.total_read = 0
15
+
16
+ cpdef bytes read(self, long long size):
17
+ """Read from copy."""
18
+
19
+ if self.closed:
20
+ raise RuntimeError("Copy object already closed.")
21
+
22
+ if size <= 0:
23
+ return b""
24
+
25
+ cdef object chunk
26
+ cdef Py_ssize_t buffer_len
27
+ cdef bytes result
28
+
29
+ try:
30
+ while len(self.bufferobj) < size:
31
+ chunk = next(self.iterator)
32
+ self.bufferobj.extend(chunk)
33
+
34
+ result = bytes(self.bufferobj[:size])
35
+ del self.bufferobj[:size]
36
+ self.total_read += len(result)
37
+ return result
38
+
39
+ except StopIteration:
40
+ self.close()
41
+ buffer_len = len(self.bufferobj)
42
+
43
+ if buffer_len > 0:
44
+ if size >= buffer_len:
45
+ result = bytes(self.bufferobj)
46
+ self.bufferobj = bytearray()
47
+ else:
48
+ result = bytes(self.bufferobj[:size])
49
+ del self.bufferobj[:size]
50
+ self.total_read += len(result)
51
+ return result
52
+ return b""
53
+
54
+ cpdef long long tell(self):
55
+ """Return the current stream position."""
56
+
57
+ if self.closed:
58
+ raise RuntimeError("Copy object already closed.")
59
+
60
+ return self.total_read
61
+
62
+ cpdef void close(self):
63
+ """Close CopyReader."""
64
+
65
+ if not self.closed:
66
+ self.copyobj.__exit__(None, None, None)
67
+ self.closed = True
@@ -0,0 +1,80 @@
1
+ from typing import (
2
+ Iterable,
3
+ NoReturn
4
+ )
5
+
6
+ from pgcopylib import PGCopyReader
7
+ from pgpack import (
8
+ PGPackReader,
9
+ metadata_reader,
10
+ )
11
+ from psycopg import Copy
12
+
13
+ from .reader import CopyReader
14
+
15
+
16
+ class StreamReader(PGPackReader):
17
+ """Class for stream read from PostgreSQL/GreenPlum."""
18
+
19
+ def __init__(
20
+ self,
21
+ metadata: bytes,
22
+ copyobj: Iterable[Copy],
23
+ ) -> None:
24
+ """Class initialization."""
25
+
26
+ self.metadata = metadata
27
+ self.copyobj = CopyReader(copyobj)
28
+ (
29
+ self.columns,
30
+ self.pgtypes,
31
+ self.pgparam,
32
+ ) = metadata_reader(self.metadata)
33
+ self.pgcopy = PGCopyReader(
34
+ self.copyobj,
35
+ self.pgtypes,
36
+ )
37
+
38
+ def __str__(self) -> str:
39
+ """String representation of PGPackReader."""
40
+
41
+ def to_col(text: str) -> str:
42
+ """Format string element."""
43
+
44
+ text = text[:14] + "…" if len(text) > 15 else text
45
+ return f" {text: <15} "
46
+
47
+ empty_line = (
48
+ "│-----------------+-----------------│"
49
+ )
50
+ end_line = (
51
+ "└─────────────────┴─────────────────┘"
52
+ )
53
+ _str = [
54
+ "<PostgreSQL/GreenPlum stream reader>",
55
+ "┌─────────────────┬─────────────────┐",
56
+ "│ Column Name │ PostgreSQL Type │",
57
+ "╞═════════════════╪═════════════════╡",
58
+ ]
59
+
60
+ for column, pgtype in zip(self.columns, self.pgtypes):
61
+ _str.append(
62
+ f"│{to_col(column)}│{to_col(pgtype.name)}│",
63
+ )
64
+ _str.append(empty_line)
65
+
66
+ _str[-1] = end_line
67
+ return "\n".join(_str) + f"""
68
+ Total columns: {len(self.columns)}
69
+ Readed rows: {self.pgcopy.num_rows}
70
+ """
71
+
72
+ def to_bytes(self) -> NoReturn:
73
+ """Get raw unpacked pgcopy data."""
74
+
75
+ raise NotImplementedError("Don't support in stream mode.")
76
+
77
+ def close(self) -> None:
78
+ """Close stream object."""
79
+
80
+ self.copyobj.close()
@@ -0,0 +1,34 @@
1
+ from enum import Enum
2
+ from typing import NamedTuple
3
+
4
+
5
+ class RelClass(NamedTuple):
6
+ """Postgres objects."""
7
+
8
+ rel_name: str
9
+ is_readobject: bool
10
+ is_readable: bool
11
+
12
+
13
+ class PGObject(RelClass, Enum):
14
+ """RelClass object from relkind value."""
15
+
16
+ r = RelClass("Relation table", True, True)
17
+ i = RelClass("Index", False, False)
18
+ S = RelClass("Sequence", False, False)
19
+ t = RelClass("Toast table", False, False)
20
+ v = RelClass("View", False, True)
21
+ m = RelClass("Materialized view", False, True)
22
+ c = RelClass("Composite type", False, False)
23
+ f = RelClass("Foreign table", False, True)
24
+ p = RelClass("Partitioned table", True, True)
25
+ I = RelClass("Partitioned index", False, True) # noqa: E741
26
+ u = RelClass("Temporary table", True, True)
27
+ o = RelClass("Optimized files", False, False)
28
+ b = RelClass("Block directory", False, False)
29
+ M = RelClass("Visibility map", False, False)
30
+
31
+ def __str__(self) -> str:
32
+ """String representation class."""
33
+
34
+ return self.rel_name
@@ -0,0 +1,460 @@
1
+ from collections import OrderedDict
2
+ from collections.abc import Generator
3
+ from io import (
4
+ BufferedReader,
5
+ BufferedWriter,
6
+ )
7
+ from logging import Logger
8
+ from types import MethodType
9
+ from typing import (
10
+ Any,
11
+ Iterable,
12
+ Iterator,
13
+ Union,
14
+ )
15
+
16
+ from pgcopylib import PGCopyWriter
17
+ from pgpack import (
18
+ CompressionMethod,
19
+ PGPackReader,
20
+ PGPackWriter,
21
+ metadata_reader,
22
+ )
23
+ from psycopg import (
24
+ Connection,
25
+ Copy,
26
+ Cursor,
27
+ )
28
+ from pandas import DataFrame as PdFrame
29
+ from polars import DataFrame as PlFrame
30
+ from sqlparse import format as sql_format
31
+
32
+ from .common import (
33
+ CopyBuffer,
34
+ DBMetadata,
35
+ DumperLogger,
36
+ PGConnector,
37
+ PGPackDumperError,
38
+ PGPackDumperReadError,
39
+ PGPackDumperWriteError,
40
+ PGPackDumperWriteBetweenError,
41
+ StreamReader,
42
+ chunk_query,
43
+ make_columns,
44
+ query_template,
45
+ transfer_diagram,
46
+ )
47
+
48
+
49
+ class PGPackDumper:
50
+ """Class for read and write PGPack format."""
51
+
52
+ def __init__(
53
+ self,
54
+ connector: PGConnector,
55
+ compression_method: CompressionMethod = CompressionMethod.ZSTD,
56
+ logger: Logger | None = None,
57
+ ) -> None:
58
+ """Class initialization."""
59
+
60
+ if not logger:
61
+ logger = DumperLogger()
62
+
63
+ try:
64
+ self.connector: PGConnector = connector
65
+ self.compression_method: CompressionMethod = compression_method
66
+ self.logger = logger
67
+ self.connect: Connection = Connection.connect(
68
+ **self.connector._asdict()
69
+ )
70
+ self.cursor: Cursor = self.connect.cursor()
71
+ self.copy_buffer: CopyBuffer = CopyBuffer(self.cursor, self.logger)
72
+ self._dbmeta: DBMetadata | None = None
73
+ self._size = 0
74
+ except Exception as error:
75
+ self.logger.error(f"{error.__class__.__name__}: {error}")
76
+ raise PGPackDumperError(error)
77
+
78
+ self.cursor.execute(query_template("dbname"))
79
+ self.dbname = self.cursor.fetchone()[0]
80
+ self.version = (
81
+ f"{self.connect.info.server_version // 10000}."
82
+ f"{self.connect.info.server_version % 1000}"
83
+ )
84
+
85
+ if self.dbname == "greenplum":
86
+ self.cursor.execute(query_template("gpversion"))
87
+ gpversion = self.cursor.fetchone()[0]
88
+ self.version = f"{self.version} gp {gpversion}"
89
+
90
+ self.logger.info(
91
+ f"PGPackDumper initialized for host {self.connector.host}"
92
+ f"[version {self.version}]"
93
+ )
94
+
95
+ @staticmethod
96
+ def multiquery(dump_method: MethodType):
97
+ """Multiquery decorator."""
98
+
99
+ def wrapper(*args, **kwargs):
100
+
101
+ first_part: list[str]
102
+ second_part: list[str]
103
+
104
+ self: PGPackDumper = args[0]
105
+ cursor: Cursor = (kwargs.get("dumper_src") or self).cursor
106
+ query: str = kwargs.get("query_src") or kwargs.get("query")
107
+ part: int = 1
108
+ first_part, second_part = chunk_query(self.query_formatter(query))
109
+ total_prts = len(sum((first_part, second_part), [])) or 1
110
+
111
+ if first_part:
112
+ self.logger.info("Multiquery detected.")
113
+
114
+ for query in first_part:
115
+ self.logger.info(f"Execute query {part}/{total_prts}")
116
+ cursor.execute(query)
117
+ part += 1
118
+
119
+ if second_part:
120
+ for key in ("query", "query_src"):
121
+ if key in kwargs:
122
+ kwargs[key] = second_part.pop(0)
123
+ break
124
+
125
+ self.logger.info(
126
+ f"Execute stream {part}/{total_prts} [pgcopy mode]"
127
+ )
128
+ output = dump_method(*args, **kwargs)
129
+
130
+ if second_part:
131
+ for query in second_part:
132
+ part += 1
133
+ self.logger.info(f"Execute query {part}/{total_prts}")
134
+ cursor.execute(query)
135
+
136
+ if output:
137
+ self.refresh()
138
+
139
+ return output
140
+
141
+ return wrapper
142
+
143
+ def query_formatter(self, query: str) -> str | None:
144
+ """Reformat query."""
145
+
146
+ if not query:
147
+ return
148
+ return sql_format(sql=query, strip_comments=True).strip().strip(";")
149
+
150
+ @multiquery
151
+ def __read_dump(
152
+ self,
153
+ fileobj: BufferedWriter,
154
+ query: str | None,
155
+ table_name: str | None,
156
+ ) -> bool:
157
+ """Internal method read_dump for generate kwargs to decorator."""
158
+
159
+ def __read_data(
160
+ copy_to: Iterator[Copy],
161
+ ) -> Generator[bytes, None, None]:
162
+ """Generate bytes from copy object with calc size."""
163
+
164
+ self._size = 0
165
+
166
+ for data in copy_to:
167
+ chunk = bytes(data)
168
+ self._size += len(chunk)
169
+ yield chunk
170
+
171
+ try:
172
+ self.copy_buffer.query = query
173
+ self.copy_buffer.table_name = table_name
174
+ metadata = self.copy_buffer.metadata
175
+ pgpack = PGPackWriter(
176
+ fileobj,
177
+ metadata,
178
+ self.compression_method,
179
+ )
180
+ columns = make_columns(*metadata_reader(metadata))
181
+ source = DBMetadata(
182
+ name=self.dbname,
183
+ version=self.version,
184
+ columns=columns,
185
+ )
186
+ destination = DBMetadata(
187
+ name="file",
188
+ version=fileobj.name,
189
+ columns=columns,
190
+ )
191
+ self.logger.info(transfer_diagram(source, destination))
192
+
193
+ with self.copy_buffer.copy_to() as copy_to:
194
+ pgpack.from_bytes(__read_data(copy_to))
195
+
196
+ pgpack.close()
197
+ self.logger.info(f"Successfully read {self._size} bytes.")
198
+ self.logger.info(
199
+ f"Read pgpack dump from {self.connector.host} done."
200
+ )
201
+ return True
202
+ except Exception as error:
203
+ self.logger.error(f"{error.__class__.__name__}: {error}")
204
+ raise PGPackDumperReadError(error)
205
+
206
+ @multiquery
207
+ def __write_between(
208
+ self,
209
+ table_dest: str,
210
+ table_src: str | None,
211
+ query_src: str | None,
212
+ dumper_src: Union["PGPackDumper", object],
213
+ ) -> bool:
214
+ """Internal method write_between for generate kwargs to decorator."""
215
+
216
+ try:
217
+ if not dumper_src:
218
+ connect = Connection.connect(**self.connector._asdict())
219
+ self.logger.info(
220
+ f"Set new connection for host {self.connector.host}."
221
+ )
222
+ source_copy_buffer = CopyBuffer(
223
+ connect.cursor(),
224
+ self.logger,
225
+ query_src,
226
+ table_src,
227
+ )
228
+ src_dbname = self.dbname
229
+ src_version = self.version
230
+ elif dumper_src.__class__ is PGPackDumper:
231
+ source_copy_buffer = dumper_src.copy_buffer
232
+ source_copy_buffer.table_name = table_src
233
+ source_copy_buffer.query = query_src
234
+ src_dbname = dumper_src.dbname
235
+ src_version = dumper_src.version
236
+ else:
237
+ reader = dumper_src.to_reader(
238
+ query=query_src,
239
+ table_name=table_src,
240
+ )
241
+ dtype_data = reader.to_rows()
242
+ self.from_rows(
243
+ dtype_data=dtype_data,
244
+ table_name=table_dest,
245
+ source=dumper_src._dbmeta,
246
+ )
247
+ size = reader.tell()
248
+ self.logger.info(f"Successfully sending {size} bytes.")
249
+ return reader.close()
250
+
251
+ self.copy_buffer.table_name = table_dest
252
+ source = DBMetadata(
253
+ name=src_dbname,
254
+ version=src_version,
255
+ columns=make_columns(
256
+ *metadata_reader(source_copy_buffer.metadata),
257
+ ),
258
+ )
259
+ destination = DBMetadata(
260
+ name=self.dbname,
261
+ version=self.version,
262
+ columns=make_columns(
263
+ *metadata_reader(self.copy_buffer.metadata),
264
+ ),
265
+ )
266
+ self.logger.info(transfer_diagram(source, destination))
267
+ self.copy_buffer.copy_between(source_copy_buffer)
268
+ self.connect.commit()
269
+ return True
270
+ except Exception as error:
271
+ self.logger.error(f"{error.__class__.__name__}: {error}")
272
+ raise PGPackDumperWriteBetweenError(error)
273
+
274
+ @multiquery
275
+ def __to_reader(
276
+ self,
277
+ query: str | None,
278
+ table_name: str | None,
279
+ ) -> StreamReader:
280
+ """Internal method to_reader for generate kwargs to decorator."""
281
+
282
+ self.copy_buffer.query = query
283
+ self.copy_buffer.table_name = table_name
284
+ metadata = self.copy_buffer.metadata
285
+ self._dbmeta = DBMetadata(
286
+ name=self.dbname,
287
+ version=self.version,
288
+ columns=make_columns(
289
+ *metadata_reader(metadata),
290
+ ),
291
+ )
292
+ return StreamReader(
293
+ metadata,
294
+ self.copy_buffer.copy_to(),
295
+ )
296
+
297
+ def read_dump(
298
+ self,
299
+ fileobj: BufferedWriter,
300
+ query: str | None = None,
301
+ table_name: str | None = None,
302
+ ) -> bool:
303
+ """Read PGPack dump from PostgreSQL/GreenPlum."""
304
+
305
+ return self.__read_dump(
306
+ fileobj=fileobj,
307
+ query=query,
308
+ table_name=table_name,
309
+ )
310
+
311
+ def write_dump(
312
+ self,
313
+ fileobj: BufferedReader,
314
+ table_name: str,
315
+ ) -> None:
316
+ """Write PGPack dump into PostgreSQL/GreenPlum."""
317
+
318
+ try:
319
+ self.copy_buffer.table_name = table_name
320
+ pgpack = PGPackReader(fileobj)
321
+ source = DBMetadata(
322
+ name="file",
323
+ version=fileobj.name,
324
+ columns=make_columns(
325
+ pgpack.columns,
326
+ pgpack.pgtypes,
327
+ pgpack.pgparam,
328
+ ),
329
+ )
330
+ destination = DBMetadata(
331
+ name=self.dbname,
332
+ version=self.version,
333
+ columns=make_columns(
334
+ *metadata_reader(self.copy_buffer.metadata),
335
+ ),
336
+ )
337
+ self.logger.info(transfer_diagram(source, destination))
338
+ self.copy_buffer.copy_from(pgpack.to_bytes())
339
+ self.connect.commit()
340
+ pgpack.close()
341
+ self.refresh()
342
+ except Exception as error:
343
+ self.logger.error(f"{error.__class__.__name__}: {error}")
344
+ raise PGPackDumperWriteError(error)
345
+
346
+ def write_between(
347
+ self,
348
+ table_dest: str,
349
+ table_src: str | None = None,
350
+ query_src: str | None = None,
351
+ dumper_src: Union["PGPackDumper", object] = None,
352
+ ) -> None:
353
+ """Write from PostgreSQL/GreenPlum into PostgreSQL/GreenPlum."""
354
+
355
+ return self.__write_between(
356
+ table_dest=table_dest,
357
+ table_src=table_src,
358
+ query_src=query_src,
359
+ dumper_src=dumper_src,
360
+ )
361
+
362
+ def to_reader(
363
+ self,
364
+ query: str | None = None,
365
+ table_name: str | None = None,
366
+ ) -> StreamReader:
367
+ """Get stream from PostgreSQL/GreenPlum as StreamReader object."""
368
+
369
+ return self.__to_reader(
370
+ query=query,
371
+ table_name=table_name,
372
+ )
373
+
374
+ def from_rows(
375
+ self,
376
+ dtype_data: Iterable[Any],
377
+ table_name: str,
378
+ source: DBMetadata | None = None,
379
+ ) -> None:
380
+ """Write from python iterable object
381
+ into PostgreSQL/GreenPlum table."""
382
+
383
+ if not source:
384
+ source = DBMetadata(
385
+ name="python",
386
+ version="iterable object",
387
+ columns={"Unknown": "Unknown"},
388
+ )
389
+
390
+ self.copy_buffer.table_name = table_name
391
+ columns, pgtypes, pgparam = metadata_reader(self.copy_buffer.metadata)
392
+ writer = PGCopyWriter(None, pgtypes)
393
+ destination = DBMetadata(
394
+ name=self.dbname,
395
+ version=self.version,
396
+ columns=make_columns(
397
+ list_columns=columns,
398
+ pgtypes=pgtypes,
399
+ pgparam=pgparam,
400
+ ),
401
+ )
402
+ self.logger.info(transfer_diagram(source, destination))
403
+ self.copy_buffer.copy_from(writer.from_rows(dtype_data))
404
+ self.connect.commit()
405
+ self.refresh()
406
+
407
+ def from_pandas(
408
+ self,
409
+ data_frame: PdFrame,
410
+ table_name: str,
411
+ ) -> None:
412
+ """Write from pandas.DataFrame into PostgreSQL/GreenPlum table."""
413
+
414
+ self.from_rows(
415
+ dtype_data=iter(data_frame.values),
416
+ table_name=table_name,
417
+ source=DBMetadata(
418
+ name="pandas",
419
+ version="DataFrame",
420
+ columns=OrderedDict(zip(
421
+ data_frame.columns,
422
+ [str(dtype) for dtype in data_frame.dtypes],
423
+ )),
424
+ )
425
+ )
426
+
427
+ def from_polars(
428
+ self,
429
+ data_frame: PlFrame,
430
+ table_name: str,
431
+ ) -> None:
432
+ """Write from polars.DataFrame into PostgreSQL/GreenPlum table."""
433
+
434
+ self.from_rows(
435
+ dtype_data=data_frame.iter_rows(),
436
+ table_name=table_name,
437
+ source=DBMetadata(
438
+ name="polars",
439
+ version="DataFrame",
440
+ columns=OrderedDict(zip(
441
+ data_frame.columns,
442
+ [str(dtype) for dtype in data_frame.dtypes],
443
+ )),
444
+ )
445
+ )
446
+
447
+ def refresh(self) -> None:
448
+ """Refresh session."""
449
+
450
+ self.connect = Connection.connect(**self.connector._asdict())
451
+ self.cursor = self.connect.cursor()
452
+ self.copy_buffer.cursor = self.cursor
453
+ self.logger.info(f"Connection to host {self.connector.host} updated.")
454
+
455
+ def close(self) -> None:
456
+ """Close session."""
457
+
458
+ self.cursor.close()
459
+ self.connect.close()
460
+ self.logger.info(f"Connection to host {self.connector.host} closed.")
@@ -0,0 +1 @@
1
+ __version__ = "0.3.0.0"