maisaedu-poormans-dms 1.1.79__tar.gz → 1.1.81__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/PKG-INFO +1 -1
- maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/Connector.py +30 -0
- maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/DynamoReplicator.py +63 -0
- maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/Writer/WriterCDC.py +102 -0
- maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/Writer/WriterFull.py +62 -0
- maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/models/Struct.py +13 -0
- maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/services/Struct.py +76 -0
- maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_migration/Contracts/__init__.py +0 -0
- maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_migration/Models/__init__.py +0 -0
- maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_migration/Services/__init__.py +0 -0
- maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_migration/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms.egg-info/PKG-INFO +1 -1
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms.egg-info/SOURCES.txt +10 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/setup.py +1 -1
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/LICENSE +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorRowInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorRowReaderInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorRowWriterInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorTableInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/Contracts/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/Migrator.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/MigratorRow/MigratorRow.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/MigratorRow/Reader.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/MigratorRow/Writer.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/MigratorRow/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/MigratorTable.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/postgres_migration/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.79/maisaedu_poormans_dms/redshift_migration/Contracts → maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/Writer}/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.79/maisaedu_poormans_dms/redshift_migration/Models → maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration}/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.79/maisaedu_poormans_dms/redshift_migration/Services → maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/models}/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.79/maisaedu_poormans_dms/redshift_migration → maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/services}/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Connector.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Contracts/ReaderInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Contracts/WriterInterface.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Logger.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/MigratorRedshift.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Models/ExtractionOperation.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Models/Struct.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Reader/GenericReader.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Reader/PostgresReader.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Reader/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Services/AdapterSourceTarget.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Services/ExtractionOperation.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Services/RelationExtraction.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Services/Struct.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Types.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Writer/GenericWriter.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Writer/WriterCDC.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Writer/WriterNonCDC.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/redshift_migration/Writer/__init__.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/sql_server_migration.py +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms.egg-info/dependency_links.txt +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms.egg-info/requires.txt +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms.egg-info/top_level.txt +0 -0
- {maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/setup.cfg +0 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import psycopg2
|
|
2
|
+
|
|
3
|
+
from maisaedu_utilities_prefect.dw import get_red_credentials
|
|
4
|
+
from maisaedu_utilities_prefect.constants.redshift import get_iam_role
|
|
5
|
+
|
|
6
|
+
class Connector:
|
|
7
|
+
def __init__(self, env):
|
|
8
|
+
self.env = env
|
|
9
|
+
self.iam_role = get_iam_role(env)
|
|
10
|
+
self.redshift = None
|
|
11
|
+
|
|
12
|
+
self.has_connected = False
|
|
13
|
+
|
|
14
|
+
def connect(self):
|
|
15
|
+
red_credentials = get_red_credentials(self.env)
|
|
16
|
+
self.redshift = psycopg2.connect(
|
|
17
|
+
host=red_credentials["host"],
|
|
18
|
+
database=red_credentials["database"],
|
|
19
|
+
user=red_credentials["user"],
|
|
20
|
+
password=red_credentials["password"],
|
|
21
|
+
port=red_credentials["port"],
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
self.has_connected = True
|
|
25
|
+
|
|
26
|
+
def close(self):
|
|
27
|
+
if not self.has_connected:
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
self.redshift.close()
|
maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/DynamoReplicator.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from maisaedu_poormans_dms.redshift_migration.Types import (
|
|
2
|
+
FULL,
|
|
3
|
+
check_if_env_is_valid,
|
|
4
|
+
check_if_option_is_valid
|
|
5
|
+
)
|
|
6
|
+
|
|
7
|
+
from .Writer.WriterFull import WriterFull
|
|
8
|
+
from .Writer.WriterCDC import WriterCDC
|
|
9
|
+
from .Connector import Connector
|
|
10
|
+
|
|
11
|
+
class DynamoReplicator:
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
env=None,
|
|
15
|
+
struct=None,
|
|
16
|
+
logger=None
|
|
17
|
+
):
|
|
18
|
+
check_if_env_is_valid(env)
|
|
19
|
+
|
|
20
|
+
self.conn = Connector(env)
|
|
21
|
+
self.env = env
|
|
22
|
+
self.struct = struct
|
|
23
|
+
self.logger = logger
|
|
24
|
+
|
|
25
|
+
def __connect(self):
|
|
26
|
+
if self.conn.redshift is None:
|
|
27
|
+
self.conn.connect()
|
|
28
|
+
|
|
29
|
+
def __run_full_flow(self, struct):
|
|
30
|
+
writer = WriterFull(
|
|
31
|
+
env=self.env,
|
|
32
|
+
conn=self.conn,
|
|
33
|
+
logger=self.logger,
|
|
34
|
+
struct=struct
|
|
35
|
+
)
|
|
36
|
+
writer.save()
|
|
37
|
+
|
|
38
|
+
def __run_incremental_flow(self, struct):
|
|
39
|
+
writer = WriterCDC(
|
|
40
|
+
env=self.env,
|
|
41
|
+
conn=self.conn,
|
|
42
|
+
logger=self.logger,
|
|
43
|
+
struct=struct
|
|
44
|
+
)
|
|
45
|
+
writer.save()
|
|
46
|
+
|
|
47
|
+
def process_records(self, load_option, struct):
|
|
48
|
+
if struct is None:
|
|
49
|
+
raise ValueError("struct must be provided")
|
|
50
|
+
|
|
51
|
+
self.__connect()
|
|
52
|
+
check_if_option_is_valid(load_option)
|
|
53
|
+
|
|
54
|
+
self.logger.info(f"Processing records for {struct}")
|
|
55
|
+
if load_option == FULL:
|
|
56
|
+
self.__run_full_flow(struct)
|
|
57
|
+
else:
|
|
58
|
+
self.__run_incremental_flow(struct)
|
|
59
|
+
|
|
60
|
+
self.conn.redshift.commit()
|
|
61
|
+
|
|
62
|
+
def __del__(self):
|
|
63
|
+
self.conn.close()
|
maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/Writer/WriterCDC.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from ..models.Struct import Struct
|
|
2
|
+
|
|
3
|
+
class WriterCDC:
|
|
4
|
+
def __init__(self, env, conn, logger, struct: Struct):
|
|
5
|
+
self.env = env
|
|
6
|
+
self.conn = conn
|
|
7
|
+
self.logger = logger
|
|
8
|
+
self.struct = struct
|
|
9
|
+
|
|
10
|
+
self.target = f"{self.struct.target_schema}.{self.struct.target_table}"
|
|
11
|
+
self.view_name = self._get_view_name()
|
|
12
|
+
|
|
13
|
+
def _get_view_name(self):
|
|
14
|
+
table_name = self.struct.target_table.replace("dynamo_", "")
|
|
15
|
+
return f"{self.struct.source_schema}.{self.struct.database}_{table_name}_stream"
|
|
16
|
+
|
|
17
|
+
def create_temp_table(self):
|
|
18
|
+
self.cursor.execute(f"""
|
|
19
|
+
CREATE TEMP TABLE tmp_cdc (
|
|
20
|
+
event_name varchar(255),
|
|
21
|
+
partition_key varchar(2048),
|
|
22
|
+
sort_key varchar(2048),
|
|
23
|
+
item super,
|
|
24
|
+
updated_at timestamp
|
|
25
|
+
);
|
|
26
|
+
REFRESH MATERIALIZED VIEW {self.view_name};
|
|
27
|
+
""")
|
|
28
|
+
|
|
29
|
+
def insert_data_to_temp(self):
|
|
30
|
+
self.cursor.execute(f"""
|
|
31
|
+
INSERT INTO tmp_cdc
|
|
32
|
+
SELECT
|
|
33
|
+
"payload"."eventName"::varchar AS event_name,
|
|
34
|
+
"payload"."dynamodb"."Keys"."partitionKey"."S"::varchar AS partition_key,
|
|
35
|
+
"payload"."dynamodb"."Keys"."sortKey"."S"::varchar AS sort_key,
|
|
36
|
+
"payload"."dynamodb"."NewImage" AS item,
|
|
37
|
+
TIMESTAMP 'epoch' + ("payload"."dynamodb"."ApproximateCreationDateTime"::bigint / 1e6) * INTERVAL '1 second' AS updated_at
|
|
38
|
+
FROM (
|
|
39
|
+
SELECT *,
|
|
40
|
+
ROW_NUMBER() OVER (PARTITION BY "payload"."dynamodb"."Keys"."partitionKey"."S", "payload"."dynamodb"."Keys"."sortKey"."S" ORDER BY "payload"."dynamodb"."ApproximateCreationDateTime" DESC) as row_num
|
|
41
|
+
FROM {self.view_name}
|
|
42
|
+
WHERE
|
|
43
|
+
(("payload"."dynamodb"."ApproximateCreationDateTime"::bigint) / 1e6) > (coalesce(extract('epoch' from (select max(updated_at) from {self.target})), 0) - {self.struct.incremental_interval_delta})
|
|
44
|
+
) subquery
|
|
45
|
+
WHERE row_num = 1;
|
|
46
|
+
""")
|
|
47
|
+
|
|
48
|
+
def delete_records(self):
|
|
49
|
+
self.cursor.execute(f"""
|
|
50
|
+
CREATE TEMP TABLE tmp_delete_ids (partition_key varchar(2048), sort_key varchar(2048));
|
|
51
|
+
INSERT INTO tmp_delete_ids
|
|
52
|
+
SELECT partition_key, sort_key
|
|
53
|
+
FROM tmp_cdc
|
|
54
|
+
WHERE event_name = 'REMOVE';
|
|
55
|
+
|
|
56
|
+
DELETE FROM {self.target}
|
|
57
|
+
USING tmp_delete_ids tmp
|
|
58
|
+
WHERE
|
|
59
|
+
{self.target}.partition_key = tmp.partition_key
|
|
60
|
+
AND {self.target}.sort_key = tmp.sort_key
|
|
61
|
+
""")
|
|
62
|
+
|
|
63
|
+
def insert_records(self):
|
|
64
|
+
self.cursor.execute(f"""
|
|
65
|
+
INSERT INTO {self.target} (partition_key, sort_key, item, updated_at)
|
|
66
|
+
SELECT
|
|
67
|
+
tmp.partition_key,
|
|
68
|
+
tmp.sort_key,
|
|
69
|
+
tmp.item,
|
|
70
|
+
tmp.updated_at
|
|
71
|
+
FROM tmp_cdc tmp
|
|
72
|
+
LEFT JOIN {self.target} t ON tmp.partition_key = t.partition_key AND tmp.sort_key = t.sort_key
|
|
73
|
+
WHERE tmp.event_name IN ('INSERT', 'MODIFY') AND t.id IS NULL
|
|
74
|
+
""")
|
|
75
|
+
|
|
76
|
+
def update_records(self):
|
|
77
|
+
self.cursor.execute(f"""
|
|
78
|
+
UPDATE {self.target} t
|
|
79
|
+
SET
|
|
80
|
+
item = tmp.item,
|
|
81
|
+
updated_at = tmp.updated_at
|
|
82
|
+
FROM tmp_cdc tmp
|
|
83
|
+
WHERE t.partition_key = tmp.partition_key AND t.sort_key = tmp.sort_key
|
|
84
|
+
AND tmp.event_name = 'MODIFY';
|
|
85
|
+
""")
|
|
86
|
+
|
|
87
|
+
def cleanup(self):
|
|
88
|
+
self.cursor.execute(f"""
|
|
89
|
+
DROP TABLE tmp_delete_ids;
|
|
90
|
+
DROP TABLE tmp_cdc;
|
|
91
|
+
""")
|
|
92
|
+
|
|
93
|
+
def save(self):
|
|
94
|
+
with self.conn.redshift.cursor() as cursor:
|
|
95
|
+
self.cursor = cursor
|
|
96
|
+
|
|
97
|
+
self.create_temp_table()
|
|
98
|
+
self.insert_data_to_temp()
|
|
99
|
+
self.delete_records()
|
|
100
|
+
self.insert_records()
|
|
101
|
+
self.update_records()
|
|
102
|
+
self.cleanup()
|
maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/Writer/WriterFull.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from ..models.Struct import Struct
|
|
2
|
+
|
|
3
|
+
TEMP_TABLE_NAME = 'tmp_full'
|
|
4
|
+
MAX_ERRORS = 100
|
|
5
|
+
|
|
6
|
+
class WriterFull:
|
|
7
|
+
def __init__(self, env, conn, logger, struct: Struct):
|
|
8
|
+
self.env = env
|
|
9
|
+
self.conn = conn
|
|
10
|
+
self.logger = logger
|
|
11
|
+
self.struct = struct
|
|
12
|
+
|
|
13
|
+
self.target = f"{self.struct.target_schema}.{self.struct.target_table}"
|
|
14
|
+
|
|
15
|
+
def create_temp_table(self):
|
|
16
|
+
self.cursor.execute(f"""
|
|
17
|
+
CREATE TEMP TABLE {TEMP_TABLE_NAME} (
|
|
18
|
+
id bigint identity(1,1) primary key,
|
|
19
|
+
item super
|
|
20
|
+
);
|
|
21
|
+
""")
|
|
22
|
+
|
|
23
|
+
def copy_data_to_target(self):
|
|
24
|
+
self.cursor.execute(
|
|
25
|
+
f"""
|
|
26
|
+
COPY {TEMP_TABLE_NAME}
|
|
27
|
+
FROM '{self.struct.s3_url}'
|
|
28
|
+
IAM_ROLE 'arn:aws:iam::977647303146:role/PrefectAssumeRoleInsideRedshiftDEV'
|
|
29
|
+
GZIP JSON 'auto ignorecase'
|
|
30
|
+
MAXERROR {MAX_ERRORS};
|
|
31
|
+
"""
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def truncate_target_table(self):
|
|
35
|
+
self.cursor.execute(f"TRUNCATE TABLE {self.target}")
|
|
36
|
+
|
|
37
|
+
def insert_data_from_temp_to_target(self):
|
|
38
|
+
self.cursor.execute(
|
|
39
|
+
f"""
|
|
40
|
+
INSERT INTO {self.target} (partition_key, sort_key, item, updated_at)
|
|
41
|
+
SELECT
|
|
42
|
+
item."partitionKey"."S"::varchar AS partition_key,
|
|
43
|
+
item."sortKey"."S"::varchar AS sort_key,
|
|
44
|
+
item,
|
|
45
|
+
NULL
|
|
46
|
+
FROM
|
|
47
|
+
tmp_full;
|
|
48
|
+
"""
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def cleanup(self):
|
|
52
|
+
self.cursor.execute(f"DROP TABLE {TEMP_TABLE_NAME}")
|
|
53
|
+
|
|
54
|
+
def save(self):
|
|
55
|
+
with self.conn.redshift.cursor() as cursor:
|
|
56
|
+
self.cursor = cursor
|
|
57
|
+
|
|
58
|
+
self.create_temp_table()
|
|
59
|
+
self.copy_data_to_target()
|
|
60
|
+
self.truncate_target_table()
|
|
61
|
+
self.insert_data_from_temp_to_target()
|
|
62
|
+
self.cleanup()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
@dataclass
|
|
4
|
+
class Struct:
|
|
5
|
+
s3_url: str # S3 URL for the full load
|
|
6
|
+
database: str # Database name, used to build the table names in the queries
|
|
7
|
+
source_schema: str # Schema where the MVW is
|
|
8
|
+
source_table: str # DynamoDB table name
|
|
9
|
+
|
|
10
|
+
target_schema: str
|
|
11
|
+
target_table: str
|
|
12
|
+
|
|
13
|
+
incremental_interval_delta: int # Interval in seconds to consider for incremental loads
|
maisaedu-poormans-dms-1.1.81/maisaedu_poormans_dms/redshift_dynamo_migration/services/Struct.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import boto3
|
|
2
|
+
from ..models.Struct import Struct as StructModel
|
|
3
|
+
from ..Connector import Connector
|
|
4
|
+
|
|
5
|
+
SOURCE_ENGINE = "dynamo"
|
|
6
|
+
AWS_REGION = "us-east-1"
|
|
7
|
+
DEFAULT_INCREMENTAL_INTERVAL_DELTA = 3600
|
|
8
|
+
|
|
9
|
+
class Struct:
|
|
10
|
+
def __init__(self, env=None, aws_credentials=None):
|
|
11
|
+
self.conn = Connector(env)
|
|
12
|
+
self.conn.connect()
|
|
13
|
+
|
|
14
|
+
self.s3_client = boto3.client(
|
|
15
|
+
's3',
|
|
16
|
+
aws_access_key_id=aws_credentials["aws_edtech_prefect_deploy_access_key_id"],
|
|
17
|
+
aws_secret_access_key=aws_credentials["aws_edtech_prefect_deploy_secret_access_key"],
|
|
18
|
+
region_name=AWS_REGION
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
def get(self, database, bucket, tables="all"):
|
|
22
|
+
with self.conn.redshift.cursor() as cursor:
|
|
23
|
+
clause = ""
|
|
24
|
+
if tables != "all":
|
|
25
|
+
joined_tables = ",".join(map(lambda t: f"'{t}'", tables))
|
|
26
|
+
clause = f"and target_table in ({joined_tables})"
|
|
27
|
+
|
|
28
|
+
cursor.execute(
|
|
29
|
+
f"""
|
|
30
|
+
select
|
|
31
|
+
database,
|
|
32
|
+
source_schema,
|
|
33
|
+
source_table,
|
|
34
|
+
target_schema,
|
|
35
|
+
target_table,
|
|
36
|
+
incremental_interval_delta
|
|
37
|
+
from
|
|
38
|
+
dataeng.relations_extraction
|
|
39
|
+
where
|
|
40
|
+
source_engine = '{SOURCE_ENGINE}'
|
|
41
|
+
and database = '{database}'
|
|
42
|
+
and is_active is true
|
|
43
|
+
{clause}
|
|
44
|
+
"""
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
relations_extraction = cursor.fetchall()
|
|
48
|
+
structs = []
|
|
49
|
+
|
|
50
|
+
for r in relations_extraction:
|
|
51
|
+
base_url = f"s3://{bucket}"
|
|
52
|
+
prefix = f"raw/dynamo/full/{r[2]}/AWSDynamoDB/"
|
|
53
|
+
s3_objects = self.s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix, Delimiter="/")
|
|
54
|
+
|
|
55
|
+
if "CommonPrefixes" not in s3_objects:
|
|
56
|
+
raise ValueError(f"No data found in {prefix}")
|
|
57
|
+
|
|
58
|
+
latest_folder = max(s3_objects["CommonPrefixes"], key=lambda x: x["Prefix"])
|
|
59
|
+
s3_url = latest_folder["Prefix"]
|
|
60
|
+
|
|
61
|
+
structs.append(
|
|
62
|
+
StructModel(
|
|
63
|
+
database=r[0],
|
|
64
|
+
source_schema=r[1],
|
|
65
|
+
source_table=r[2],
|
|
66
|
+
target_schema=r[3],
|
|
67
|
+
target_table=r[4],
|
|
68
|
+
incremental_interval_delta=r[5] or DEFAULT_INCREMENTAL_INTERVAL_DELTA,
|
|
69
|
+
s3_url=f"{base_url}/{s3_url}data"
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
return structs
|
|
74
|
+
|
|
75
|
+
def __del__(self):
|
|
76
|
+
self.conn.close()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -20,6 +20,16 @@ maisaedu_poormans_dms/postgres_migration/MigratorRow/MigratorRow.py
|
|
|
20
20
|
maisaedu_poormans_dms/postgres_migration/MigratorRow/Reader.py
|
|
21
21
|
maisaedu_poormans_dms/postgres_migration/MigratorRow/Writer.py
|
|
22
22
|
maisaedu_poormans_dms/postgres_migration/MigratorRow/__init__.py
|
|
23
|
+
maisaedu_poormans_dms/redshift_dynamo_migration/Connector.py
|
|
24
|
+
maisaedu_poormans_dms/redshift_dynamo_migration/DynamoReplicator.py
|
|
25
|
+
maisaedu_poormans_dms/redshift_dynamo_migration/__init__.py
|
|
26
|
+
maisaedu_poormans_dms/redshift_dynamo_migration/Writer/WriterCDC.py
|
|
27
|
+
maisaedu_poormans_dms/redshift_dynamo_migration/Writer/WriterFull.py
|
|
28
|
+
maisaedu_poormans_dms/redshift_dynamo_migration/Writer/__init__.py
|
|
29
|
+
maisaedu_poormans_dms/redshift_dynamo_migration/models/Struct.py
|
|
30
|
+
maisaedu_poormans_dms/redshift_dynamo_migration/models/__init__.py
|
|
31
|
+
maisaedu_poormans_dms/redshift_dynamo_migration/services/Struct.py
|
|
32
|
+
maisaedu_poormans_dms/redshift_dynamo_migration/services/__init__.py
|
|
23
33
|
maisaedu_poormans_dms/redshift_migration/Connector.py
|
|
24
34
|
maisaedu_poormans_dms/redshift_migration/Logger.py
|
|
25
35
|
maisaedu_poormans_dms/redshift_migration/MigratorRedshift.py
|
|
File without changes
|
{maisaedu-poormans-dms-1.1.79 → maisaedu-poormans-dms-1.1.81}/maisaedu_poormans_dms/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|