maisaedu-poormans-dms 1.1.76__tar.gz → 1.1.78__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/PKG-INFO +1 -1
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Connector.py +20 -5
- maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Contracts/ReaderInterface.py +22 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/MigratorRedshift.py +4 -4
- maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Reader/GenericReader.py +104 -0
- maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Reader/PostgresReader.py +82 -0
- maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Reader/__init__.py +9 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Types.py +2 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms.egg-info/PKG-INFO +1 -1
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms.egg-info/SOURCES.txt +4 -1
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/setup.py +1 -1
- maisaedu-poormans-dms-1.1.76/maisaedu_poormans_dms/redshift_migration/Reader.py +0 -166
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/LICENSE +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorRowInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorRowReaderInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorRowWriterInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorTableInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Contracts/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Migrator.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/MigratorRow/MigratorRow.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/MigratorRow/Reader.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/MigratorRow/Writer.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/MigratorRow/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/MigratorTable.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Contracts/WriterInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Contracts/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Logger.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Models/ExtractionOperation.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Models/Struct.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Models/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Services/AdapterSourceTarget.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Services/ExtractionOperation.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Services/RelationExtraction.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Services/Struct.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Services/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Writer/GenericWriter.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Writer/WriterCDC.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Writer/WriterNonCDC.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Writer/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/sql_server_migration.py +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms.egg-info/dependency_links.txt +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms.egg-info/requires.txt +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms.egg-info/top_level.txt +0 -0
- {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/setup.cfg +0 -0
|
@@ -3,17 +3,28 @@ import psycopg2
|
|
|
3
3
|
from sqlalchemy import create_engine
|
|
4
4
|
|
|
5
5
|
from maisaedu_utilities_prefect.dw import get_red_credentials
|
|
6
|
-
from .Types import DEV, LOCAL, get_iam_role
|
|
6
|
+
from .Types import DEV, LOCAL, POSTGRES, SQLSERVER, get_iam_role
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class Connector:
|
|
10
10
|
def __init__(self, env, s3_credentials, source_credentials, target_credentials):
|
|
11
11
|
self.source_credentials = source_credentials
|
|
12
|
+
self.__set_source_conn_type()
|
|
12
13
|
self.target_credentials = target_credentials
|
|
13
14
|
self.s3_credentials = s3_credentials
|
|
14
15
|
self.env = env
|
|
15
16
|
self.iam_role = get_iam_role(env)
|
|
16
17
|
|
|
18
|
+
def __set_source_conn_type(self):
|
|
19
|
+
if self.source_credentials is None:
|
|
20
|
+
self.source_conn_type = None
|
|
21
|
+
else:
|
|
22
|
+
if 'type' not in self.source_credentials:
|
|
23
|
+
self.source_conn_type = POSTGRES
|
|
24
|
+
else:
|
|
25
|
+
self.source_conn_type = self.source_credentials['type']
|
|
26
|
+
|
|
27
|
+
|
|
17
28
|
def connect_target(self):
|
|
18
29
|
if self.target_credentials is None:
|
|
19
30
|
if self.env == LOCAL:
|
|
@@ -48,10 +59,14 @@ class Connector:
|
|
|
48
59
|
self.s3_session = session.resource("s3")
|
|
49
60
|
|
|
50
61
|
def connect_source(self):
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
62
|
+
if self.source_conn_type == POSTGRES:
|
|
63
|
+
engine = create_engine(
|
|
64
|
+
f"postgresql+psycopg2://{self.source_credentials['user']}:{self.source_credentials['password']}@{self.source_credentials['host']}:{self.source_credentials['port']}/{self.source_credentials['database']}"
|
|
65
|
+
)
|
|
66
|
+
self.source_conn = engine.connect().execution_options(stream_results=True)
|
|
67
|
+
elif self.source_conn_type == SQLSERVER:
|
|
68
|
+
print("SQLSERVER")
|
|
69
|
+
# TODO
|
|
55
70
|
|
|
56
71
|
def close_source(self):
|
|
57
72
|
self.source_conn.close()
|
maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Contracts/ReaderInterface.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
class ReaderInterface(ABC):
|
|
4
|
+
@abstractmethod
|
|
5
|
+
def get_incremental_statement(self) -> str:
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
@abstractmethod
|
|
9
|
+
def get_columns_source(self) -> str:
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def get_order_by_sql_statement(self) -> str:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def get_limit_sql_statement(self) -> str:
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def get_sql_statement(self) -> str:
|
|
22
|
+
pass
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .Connector import Connector
|
|
2
|
-
from .Reader import
|
|
2
|
+
from .Reader import factory as reader_factory
|
|
3
3
|
from .Logger import Logger
|
|
4
|
-
from .Writer import factory
|
|
4
|
+
from .Writer import factory as writer_factory
|
|
5
5
|
from .Services.Struct import Struct
|
|
6
6
|
from .Services.ExtractionOperation import ExtractionOperation
|
|
7
7
|
from .Services.RelationExtraction import RelationExtraction
|
|
@@ -38,7 +38,7 @@ class MigratorRedshift:
|
|
|
38
38
|
|
|
39
39
|
self.migrator_redshift_connector.connect_target()
|
|
40
40
|
|
|
41
|
-
self.migrator_redshift_reader =
|
|
41
|
+
self.migrator_redshift_reader = reader_factory(
|
|
42
42
|
s3_credentials=s3_credentials,
|
|
43
43
|
struct=struct,
|
|
44
44
|
migrator_redshift_connector=self.migrator_redshift_connector,
|
|
@@ -150,7 +150,7 @@ class MigratorRedshift:
|
|
|
150
150
|
self.__check_target_table_has_data()
|
|
151
151
|
update_by_cdc = self.__check_table_will_be_updated_by_cdc(load_option)
|
|
152
152
|
|
|
153
|
-
self.migrator_redshift_writer =
|
|
153
|
+
self.migrator_redshift_writer = writer_factory(
|
|
154
154
|
env=self.env,
|
|
155
155
|
update_by_cdc=update_by_cdc,
|
|
156
156
|
struct=self.struct,
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import threading
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from ..Types import (
|
|
6
|
+
target_type_is_numeric,
|
|
7
|
+
LOCAL,
|
|
8
|
+
FULL,
|
|
9
|
+
PROD,
|
|
10
|
+
INCREMENTAL,
|
|
11
|
+
SAVED_S3,
|
|
12
|
+
PREFECT,
|
|
13
|
+
S3,
|
|
14
|
+
)
|
|
15
|
+
from ..Services.ExtractionOperation import ExtractionOperation
|
|
16
|
+
from ..Services.AdapterSourceTarget import AdapterSourceTarget
|
|
17
|
+
from ..Models.ExtractionOperation import ExtractionOperation as ExtractionOperationModel
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class GenericReader:
|
|
21
|
+
def __init__(self, s3_credentials, struct, migrator_redshift_connector):
|
|
22
|
+
self.struct = struct
|
|
23
|
+
self.s3_credentials = s3_credentials
|
|
24
|
+
self.migrator_redshift_connector = migrator_redshift_connector
|
|
25
|
+
|
|
26
|
+
def __save_on_bucket(self, df, path_file, format="parquet"):
|
|
27
|
+
buffer = io.BytesIO()
|
|
28
|
+
|
|
29
|
+
if format == "csv":
|
|
30
|
+
df.to_csv(buffer, index=False)
|
|
31
|
+
else:
|
|
32
|
+
df.to_parquet(buffer, index=False, engine="pyarrow")
|
|
33
|
+
self.migrator_redshift_connector.s3_session.Object(
|
|
34
|
+
self.s3_credentials["bucket"],
|
|
35
|
+
path_file,
|
|
36
|
+
).put(Body=buffer.getvalue())
|
|
37
|
+
|
|
38
|
+
buffer.close()
|
|
39
|
+
|
|
40
|
+
def __process_chunk(self, chunk_df, path_file, path_file_tmp):
|
|
41
|
+
adapter = AdapterSourceTarget(self.struct)
|
|
42
|
+
chunk_df_s3 = chunk_df.copy()
|
|
43
|
+
|
|
44
|
+
chunk_df_s3 = adapter.transform_data(chunk_df_s3, target_save=S3)
|
|
45
|
+
|
|
46
|
+
self.__save_on_bucket(chunk_df_s3, path_file)
|
|
47
|
+
|
|
48
|
+
chunk_df = adapter.convert_types(chunk_df)
|
|
49
|
+
chunk_df = adapter.transform_data(chunk_df)
|
|
50
|
+
chunk_df = adapter.equalize_number_columns(chunk_df)
|
|
51
|
+
|
|
52
|
+
self.__save_on_bucket(chunk_df, path_file_tmp, format="csv")
|
|
53
|
+
|
|
54
|
+
def save_data_to_s3(self, load_option=None):
|
|
55
|
+
self.load_option = load_option
|
|
56
|
+
self.migrator_redshift_connector.connect_s3()
|
|
57
|
+
self.migrator_redshift_connector.connect_source()
|
|
58
|
+
|
|
59
|
+
sql = self.get_sql_statement()
|
|
60
|
+
|
|
61
|
+
time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
|
|
62
|
+
idx = 1
|
|
63
|
+
path_file = None
|
|
64
|
+
threads = []
|
|
65
|
+
|
|
66
|
+
for chunk_df in pd.read_sql(
|
|
67
|
+
sql,
|
|
68
|
+
self.migrator_redshift_connector.source_conn,
|
|
69
|
+
chunksize=self.struct.read_batch_size,
|
|
70
|
+
):
|
|
71
|
+
if len(chunk_df) != 0:
|
|
72
|
+
path_file = f"raw/prefect/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/{idx}.parquet"
|
|
73
|
+
path_file_tmp = f"raw/tmp/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/{idx}.csv"
|
|
74
|
+
|
|
75
|
+
thread = threading.Thread(target=self.__process_chunk, args=(chunk_df, path_file, path_file_tmp))
|
|
76
|
+
thread.start()
|
|
77
|
+
threads.append(thread)
|
|
78
|
+
|
|
79
|
+
idx = idx + 1
|
|
80
|
+
|
|
81
|
+
for thread in threads:
|
|
82
|
+
thread.join()
|
|
83
|
+
|
|
84
|
+
self.migrator_redshift_connector.close_source()
|
|
85
|
+
|
|
86
|
+
if path_file is None:
|
|
87
|
+
return None
|
|
88
|
+
else:
|
|
89
|
+
url = f's3://{self.s3_credentials["bucket"]}/raw/prefect/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/'
|
|
90
|
+
|
|
91
|
+
ExtractionOperation(
|
|
92
|
+
conn=self.migrator_redshift_connector.target_conn,
|
|
93
|
+
).create(
|
|
94
|
+
struct=self.struct,
|
|
95
|
+
url=url,
|
|
96
|
+
load_option=self.load_option,
|
|
97
|
+
status=SAVED_S3,
|
|
98
|
+
platform=self.struct.extraction_engine,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return ExtractionOperationModel(
|
|
102
|
+
url=url,
|
|
103
|
+
load_option=self.load_option,
|
|
104
|
+
)
|
maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Reader/PostgresReader.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from ..Types import (
|
|
2
|
+
target_type_is_numeric,
|
|
3
|
+
LOCAL,
|
|
4
|
+
FULL,
|
|
5
|
+
INCREMENTAL,
|
|
6
|
+
)
|
|
7
|
+
from .GenericReader import GenericReader
|
|
8
|
+
from ..Contracts.ReaderInterface import ReaderInterface
|
|
9
|
+
|
|
10
|
+
class PostgresReader(GenericReader, ReaderInterface):
|
|
11
|
+
def get_incremental_statement(self):
|
|
12
|
+
if (
|
|
13
|
+
(
|
|
14
|
+
self.struct.source_incremental_column is not None
|
|
15
|
+
and self.struct.target_incremental_column is not None
|
|
16
|
+
and (self.load_option is None)
|
|
17
|
+
) or (self.load_option == INCREMENTAL)
|
|
18
|
+
):
|
|
19
|
+
sql = f"""
|
|
20
|
+
select max("{self.struct.target_incremental_column}") as max_value
|
|
21
|
+
from "{self.struct.target_schema}"."{self.struct.target_table}"
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
cursor = self.migrator_redshift_connector.target_conn.cursor()
|
|
25
|
+
|
|
26
|
+
cursor.execute(sql)
|
|
27
|
+
result = cursor.fetchall()
|
|
28
|
+
|
|
29
|
+
if len(result) == 0 or result[0][0] is None:
|
|
30
|
+
sql_return = ""
|
|
31
|
+
self.load_option = FULL
|
|
32
|
+
else:
|
|
33
|
+
for c in self.struct.columns:
|
|
34
|
+
if c["target_name"] == self.struct.target_incremental_column:
|
|
35
|
+
target_type = c["target_type"]
|
|
36
|
+
|
|
37
|
+
if target_type_is_numeric(target_type):
|
|
38
|
+
sql_return = f'and "{self.struct.source_incremental_column}" > {result[0][0]}'
|
|
39
|
+
else:
|
|
40
|
+
if (
|
|
41
|
+
self.struct.incremental_interval_delta is None
|
|
42
|
+
or self.struct.incremental_interval_delta == ""
|
|
43
|
+
):
|
|
44
|
+
sql_return = f"and \"{self.struct.source_incremental_column}\" > '{result[0][0]}'"
|
|
45
|
+
else:
|
|
46
|
+
sql_return = f"and \"{self.struct.source_incremental_column}\" >= '{result[0][0]}'::timestamp - interval '{self.struct.incremental_interval_delta}'"
|
|
47
|
+
|
|
48
|
+
self.load_option = INCREMENTAL
|
|
49
|
+
|
|
50
|
+
cursor.close()
|
|
51
|
+
|
|
52
|
+
return sql_return
|
|
53
|
+
else:
|
|
54
|
+
if (self.load_option is None):
|
|
55
|
+
self.load_option = FULL
|
|
56
|
+
return ""
|
|
57
|
+
|
|
58
|
+
def get_columns_source(self):
|
|
59
|
+
return " * "
|
|
60
|
+
|
|
61
|
+
def get_order_by_sql_statement(self):
|
|
62
|
+
if self.struct.source_incremental_column is not None:
|
|
63
|
+
return f' order by "{self.struct.source_incremental_column}" asc'
|
|
64
|
+
else:
|
|
65
|
+
return ""
|
|
66
|
+
|
|
67
|
+
def get_limit_sql_statement(self):
|
|
68
|
+
if self.migrator_redshift_connector.env == LOCAL:
|
|
69
|
+
return f" limit 100"
|
|
70
|
+
else:
|
|
71
|
+
return f""
|
|
72
|
+
|
|
73
|
+
def get_sql_statement(self):
|
|
74
|
+
sql = f"""
|
|
75
|
+
select {self.get_columns_source()}
|
|
76
|
+
from "{self.struct.source_schema}"."{self.struct.source_table}"
|
|
77
|
+
where 1=1
|
|
78
|
+
{self.get_incremental_statement()}
|
|
79
|
+
{self.get_order_by_sql_statement()}
|
|
80
|
+
{self.get_limit_sql_statement()}
|
|
81
|
+
"""
|
|
82
|
+
return sql
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from .PostgresReader import PostgresReader
|
|
2
|
+
from ..Types import POSTGRES, SQLSERVER
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def factory(s3_credentials, struct, migrator_redshift_connector):
|
|
6
|
+
if migrator_redshift_connector.source_conn_type == POSTGRES:
|
|
7
|
+
return PostgresReader(s3_credentials, struct, migrator_redshift_connector)
|
|
8
|
+
elif migrator_redshift_connector.source_conn_type == SQLSERVER:
|
|
9
|
+
return None
|
|
@@ -23,14 +23,17 @@ maisaedu_poormans_dms/postgres_migration/MigratorRow/__init__.py
|
|
|
23
23
|
maisaedu_poormans_dms/redshift_migration/Connector.py
|
|
24
24
|
maisaedu_poormans_dms/redshift_migration/Logger.py
|
|
25
25
|
maisaedu_poormans_dms/redshift_migration/MigratorRedshift.py
|
|
26
|
-
maisaedu_poormans_dms/redshift_migration/Reader.py
|
|
27
26
|
maisaedu_poormans_dms/redshift_migration/Types.py
|
|
28
27
|
maisaedu_poormans_dms/redshift_migration/__init__.py
|
|
28
|
+
maisaedu_poormans_dms/redshift_migration/Contracts/ReaderInterface.py
|
|
29
29
|
maisaedu_poormans_dms/redshift_migration/Contracts/WriterInterface.py
|
|
30
30
|
maisaedu_poormans_dms/redshift_migration/Contracts/__init__.py
|
|
31
31
|
maisaedu_poormans_dms/redshift_migration/Models/ExtractionOperation.py
|
|
32
32
|
maisaedu_poormans_dms/redshift_migration/Models/Struct.py
|
|
33
33
|
maisaedu_poormans_dms/redshift_migration/Models/__init__.py
|
|
34
|
+
maisaedu_poormans_dms/redshift_migration/Reader/GenericReader.py
|
|
35
|
+
maisaedu_poormans_dms/redshift_migration/Reader/PostgresReader.py
|
|
36
|
+
maisaedu_poormans_dms/redshift_migration/Reader/__init__.py
|
|
34
37
|
maisaedu_poormans_dms/redshift_migration/Services/AdapterSourceTarget.py
|
|
35
38
|
maisaedu_poormans_dms/redshift_migration/Services/ExtractionOperation.py
|
|
36
39
|
maisaedu_poormans_dms/redshift_migration/Services/RelationExtraction.py
|
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
import io
|
|
2
|
-
import pandas as pd
|
|
3
|
-
from datetime import datetime
|
|
4
|
-
from .Types import (
|
|
5
|
-
target_type_is_numeric,
|
|
6
|
-
LOCAL,
|
|
7
|
-
FULL,
|
|
8
|
-
PROD,
|
|
9
|
-
INCREMENTAL,
|
|
10
|
-
SAVED_S3,
|
|
11
|
-
PREFECT,
|
|
12
|
-
S3,
|
|
13
|
-
)
|
|
14
|
-
from .Services.ExtractionOperation import ExtractionOperation
|
|
15
|
-
from .Services.AdapterSourceTarget import AdapterSourceTarget
|
|
16
|
-
from .Models.ExtractionOperation import ExtractionOperation as ExtractionOperationModel
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class Reader:
|
|
20
|
-
def __init__(self, s3_credentials, struct, migrator_redshift_connector):
|
|
21
|
-
self.struct = struct
|
|
22
|
-
self.s3_credentials = s3_credentials
|
|
23
|
-
self.migrator_redshift_connector = migrator_redshift_connector
|
|
24
|
-
|
|
25
|
-
def get_incremental_statement(self):
|
|
26
|
-
if (
|
|
27
|
-
(
|
|
28
|
-
self.struct.source_incremental_column is not None
|
|
29
|
-
and self.struct.target_incremental_column is not None
|
|
30
|
-
and (self.load_option is None)
|
|
31
|
-
) or (self.load_option == INCREMENTAL)
|
|
32
|
-
):
|
|
33
|
-
sql = f"""
|
|
34
|
-
select max("{self.struct.target_incremental_column}") as max_value
|
|
35
|
-
from "{self.struct.target_schema}"."{self.struct.target_table}"
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
cursor = self.migrator_redshift_connector.target_conn.cursor()
|
|
39
|
-
|
|
40
|
-
cursor.execute(sql)
|
|
41
|
-
result = cursor.fetchall()
|
|
42
|
-
|
|
43
|
-
if len(result) == 0 or result[0][0] is None:
|
|
44
|
-
sql_return = ""
|
|
45
|
-
self.load_option = FULL
|
|
46
|
-
else:
|
|
47
|
-
for c in self.struct.columns:
|
|
48
|
-
if c["target_name"] == self.struct.target_incremental_column:
|
|
49
|
-
target_type = c["target_type"]
|
|
50
|
-
|
|
51
|
-
if target_type_is_numeric(target_type):
|
|
52
|
-
sql_return = f'and "{self.struct.source_incremental_column}" > {result[0][0]}'
|
|
53
|
-
else:
|
|
54
|
-
if (
|
|
55
|
-
self.struct.incremental_interval_delta is None
|
|
56
|
-
or self.struct.incremental_interval_delta == ""
|
|
57
|
-
):
|
|
58
|
-
sql_return = f"and \"{self.struct.source_incremental_column}\" > '{result[0][0]}'"
|
|
59
|
-
else:
|
|
60
|
-
sql_return = f"and \"{self.struct.source_incremental_column}\" >= '{result[0][0]}'::timestamp - interval '{self.struct.incremental_interval_delta}'"
|
|
61
|
-
|
|
62
|
-
self.load_option = INCREMENTAL
|
|
63
|
-
|
|
64
|
-
cursor.close()
|
|
65
|
-
|
|
66
|
-
return sql_return
|
|
67
|
-
else:
|
|
68
|
-
if (self.load_option is None):
|
|
69
|
-
self.load_option = FULL
|
|
70
|
-
return ""
|
|
71
|
-
|
|
72
|
-
def get_columns_source(self):
|
|
73
|
-
return " * "
|
|
74
|
-
|
|
75
|
-
def get_order_by_sql_statement(self):
|
|
76
|
-
if self.struct.source_incremental_column is not None:
|
|
77
|
-
return f' order by "{self.struct.source_incremental_column}" asc'
|
|
78
|
-
else:
|
|
79
|
-
return ""
|
|
80
|
-
|
|
81
|
-
def get_limit_sql_statement(self):
|
|
82
|
-
if self.migrator_redshift_connector.env == LOCAL:
|
|
83
|
-
return f" limit 100"
|
|
84
|
-
else:
|
|
85
|
-
return f""
|
|
86
|
-
|
|
87
|
-
def get_sql_statement(self):
|
|
88
|
-
sql = f"""
|
|
89
|
-
select {self.get_columns_source()}
|
|
90
|
-
from "{self.struct.source_schema}"."{self.struct.source_table}"
|
|
91
|
-
where 1=1
|
|
92
|
-
{self.get_incremental_statement()}
|
|
93
|
-
{self.get_order_by_sql_statement()}
|
|
94
|
-
{self.get_limit_sql_statement()}
|
|
95
|
-
"""
|
|
96
|
-
return sql
|
|
97
|
-
|
|
98
|
-
def save_on_bucket(self, df, path_file, format="parquet"):
|
|
99
|
-
buffer = io.BytesIO()
|
|
100
|
-
|
|
101
|
-
if format == "csv":
|
|
102
|
-
df.to_csv(buffer, index=False)
|
|
103
|
-
else:
|
|
104
|
-
df.to_parquet(buffer, index=False, engine="pyarrow")
|
|
105
|
-
self.migrator_redshift_connector.s3_session.Object(
|
|
106
|
-
self.s3_credentials["bucket"],
|
|
107
|
-
path_file,
|
|
108
|
-
).put(Body=buffer.getvalue())
|
|
109
|
-
|
|
110
|
-
buffer.close()
|
|
111
|
-
|
|
112
|
-
def save_data_to_s3(self, load_option=None):
|
|
113
|
-
self.load_option = load_option
|
|
114
|
-
self.migrator_redshift_connector.connect_s3()
|
|
115
|
-
self.migrator_redshift_connector.connect_source()
|
|
116
|
-
|
|
117
|
-
sql = self.get_sql_statement()
|
|
118
|
-
|
|
119
|
-
time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
|
|
120
|
-
idx = 1
|
|
121
|
-
path_file = None
|
|
122
|
-
|
|
123
|
-
for chunk_df in pd.read_sql(
|
|
124
|
-
sql,
|
|
125
|
-
self.migrator_redshift_connector.source_conn,
|
|
126
|
-
chunksize=self.struct.read_batch_size,
|
|
127
|
-
):
|
|
128
|
-
if len(chunk_df) != 0:
|
|
129
|
-
path_file = f"raw/prefect/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/{idx}.parquet"
|
|
130
|
-
path_file_tmp = f"raw/tmp/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/{idx}.csv"
|
|
131
|
-
|
|
132
|
-
adapter = AdapterSourceTarget(self.struct)
|
|
133
|
-
chunk_df_s3 = chunk_df.copy()
|
|
134
|
-
|
|
135
|
-
chunk_df_s3 = adapter.transform_data(chunk_df_s3, target_save=S3)
|
|
136
|
-
|
|
137
|
-
self.save_on_bucket(chunk_df_s3, path_file)
|
|
138
|
-
|
|
139
|
-
chunk_df = adapter.convert_types(chunk_df)
|
|
140
|
-
chunk_df = adapter.transform_data(chunk_df)
|
|
141
|
-
chunk_df = adapter.equalize_number_columns(chunk_df)
|
|
142
|
-
|
|
143
|
-
self.save_on_bucket(chunk_df, path_file_tmp, format="csv")
|
|
144
|
-
idx = idx + 1
|
|
145
|
-
|
|
146
|
-
self.migrator_redshift_connector.close_source()
|
|
147
|
-
|
|
148
|
-
if path_file is None:
|
|
149
|
-
return None
|
|
150
|
-
else:
|
|
151
|
-
url = f's3://{self.s3_credentials["bucket"]}/raw/prefect/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/'
|
|
152
|
-
|
|
153
|
-
ExtractionOperation(
|
|
154
|
-
conn=self.migrator_redshift_connector.target_conn,
|
|
155
|
-
).create(
|
|
156
|
-
struct=self.struct,
|
|
157
|
-
url=url,
|
|
158
|
-
load_option=self.load_option,
|
|
159
|
-
status=SAVED_S3,
|
|
160
|
-
platform=self.struct.extraction_engine,
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
return ExtractionOperationModel(
|
|
164
|
-
url=url,
|
|
165
|
-
load_option=self.load_option,
|
|
166
|
-
)
|
|
File without changes
|
{maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|