maisaedu-poormans-dms 1.1.76__tar.gz → 1.1.78__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/PKG-INFO +1 -1
  2. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Connector.py +20 -5
  3. maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Contracts/ReaderInterface.py +22 -0
  4. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/MigratorRedshift.py +4 -4
  5. maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Reader/GenericReader.py +104 -0
  6. maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Reader/PostgresReader.py +82 -0
  7. maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Reader/__init__.py +9 -0
  8. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Types.py +2 -0
  9. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms.egg-info/PKG-INFO +1 -1
  10. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms.egg-info/SOURCES.txt +4 -1
  11. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/setup.py +1 -1
  12. maisaedu-poormans-dms-1.1.76/maisaedu_poormans_dms/redshift_migration/Reader.py +0 -166
  13. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/LICENSE +0 -0
  14. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/__init__.py +0 -0
  15. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorInterface.py +0 -0
  16. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorRowInterface.py +0 -0
  17. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorRowReaderInterface.py +0 -0
  18. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorRowWriterInterface.py +0 -0
  19. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Contracts/MigratorTableInterface.py +0 -0
  20. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Contracts/__init__.py +0 -0
  21. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/Migrator.py +0 -0
  22. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/MigratorRow/MigratorRow.py +0 -0
  23. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/MigratorRow/Reader.py +0 -0
  24. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/MigratorRow/Writer.py +0 -0
  25. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/MigratorRow/__init__.py +0 -0
  26. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/MigratorTable.py +0 -0
  27. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/postgres_migration/__init__.py +0 -0
  28. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Contracts/WriterInterface.py +0 -0
  29. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Contracts/__init__.py +0 -0
  30. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Logger.py +0 -0
  31. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Models/ExtractionOperation.py +0 -0
  32. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Models/Struct.py +0 -0
  33. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Models/__init__.py +0 -0
  34. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Services/AdapterSourceTarget.py +0 -0
  35. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Services/ExtractionOperation.py +0 -0
  36. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Services/RelationExtraction.py +0 -0
  37. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Services/Struct.py +0 -0
  38. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Services/__init__.py +0 -0
  39. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Writer/GenericWriter.py +0 -0
  40. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Writer/WriterCDC.py +0 -0
  41. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Writer/WriterNonCDC.py +0 -0
  42. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Writer/__init__.py +0 -0
  43. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/__init__.py +0 -0
  44. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/sql_server_migration.py +0 -0
  45. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms.egg-info/dependency_links.txt +0 -0
  46. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms.egg-info/requires.txt +0 -0
  47. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms.egg-info/top_level.txt +0 -0
  48. {maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maisaedu-poormans-dms
3
- Version: 1.1.76
3
+ Version: 1.1.78
4
4
  Summary: A library for making database migration tasks, for +A Education
5
5
  Home-page: UNKNOWN
6
6
  Author: A+ Educação
@@ -3,17 +3,28 @@ import psycopg2
3
3
  from sqlalchemy import create_engine
4
4
 
5
5
  from maisaedu_utilities_prefect.dw import get_red_credentials
6
- from .Types import DEV, LOCAL, get_iam_role
6
+ from .Types import DEV, LOCAL, POSTGRES, SQLSERVER, get_iam_role
7
7
 
8
8
 
9
9
  class Connector:
10
10
  def __init__(self, env, s3_credentials, source_credentials, target_credentials):
11
11
  self.source_credentials = source_credentials
12
+ self.__set_source_conn_type()
12
13
  self.target_credentials = target_credentials
13
14
  self.s3_credentials = s3_credentials
14
15
  self.env = env
15
16
  self.iam_role = get_iam_role(env)
16
17
 
18
+ def __set_source_conn_type(self):
19
+ if self.source_credentials is None:
20
+ self.source_conn_type = None
21
+ else:
22
+ if 'type' not in self.source_credentials:
23
+ self.source_conn_type = POSTGRES
24
+ else:
25
+ self.source_conn_type = self.source_credentials['type']
26
+
27
+
17
28
  def connect_target(self):
18
29
  if self.target_credentials is None:
19
30
  if self.env == LOCAL:
@@ -48,10 +59,14 @@ class Connector:
48
59
  self.s3_session = session.resource("s3")
49
60
 
50
61
  def connect_source(self):
51
- engine = create_engine(
52
- f"postgresql+psycopg2://{self.source_credentials['user']}:{self.source_credentials['password']}@{self.source_credentials['host']}:{self.source_credentials['port']}/{self.source_credentials['database']}"
53
- )
54
- self.source_conn = engine.connect().execution_options(stream_results=True)
62
+ if self.source_conn_type == POSTGRES:
63
+ engine = create_engine(
64
+ f"postgresql+psycopg2://{self.source_credentials['user']}:{self.source_credentials['password']}@{self.source_credentials['host']}:{self.source_credentials['port']}/{self.source_credentials['database']}"
65
+ )
66
+ self.source_conn = engine.connect().execution_options(stream_results=True)
67
+ elif self.source_conn_type == SQLSERVER:
68
+ print("SQLSERVER")
69
+ # TODO
55
70
 
56
71
  def close_source(self):
57
72
  self.source_conn.close()
@@ -0,0 +1,22 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ class ReaderInterface(ABC):
4
+ @abstractmethod
5
+ def get_incremental_statement(self) -> str:
6
+ pass
7
+
8
+ @abstractmethod
9
+ def get_columns_source(self) -> str:
10
+ pass
11
+
12
+ @abstractmethod
13
+ def get_order_by_sql_statement(self) -> str:
14
+ pass
15
+
16
+ @abstractmethod
17
+ def get_limit_sql_statement(self) -> str:
18
+ pass
19
+
20
+ @abstractmethod
21
+ def get_sql_statement(self) -> str:
22
+ pass
@@ -1,7 +1,7 @@
1
1
  from .Connector import Connector
2
- from .Reader import Reader
2
+ from .Reader import factory as reader_factory
3
3
  from .Logger import Logger
4
- from .Writer import factory
4
+ from .Writer import factory as writer_factory
5
5
  from .Services.Struct import Struct
6
6
  from .Services.ExtractionOperation import ExtractionOperation
7
7
  from .Services.RelationExtraction import RelationExtraction
@@ -38,7 +38,7 @@ class MigratorRedshift:
38
38
 
39
39
  self.migrator_redshift_connector.connect_target()
40
40
 
41
- self.migrator_redshift_reader = Reader(
41
+ self.migrator_redshift_reader = reader_factory(
42
42
  s3_credentials=s3_credentials,
43
43
  struct=struct,
44
44
  migrator_redshift_connector=self.migrator_redshift_connector,
@@ -150,7 +150,7 @@ class MigratorRedshift:
150
150
  self.__check_target_table_has_data()
151
151
  update_by_cdc = self.__check_table_will_be_updated_by_cdc(load_option)
152
152
 
153
- self.migrator_redshift_writer = factory(
153
+ self.migrator_redshift_writer = writer_factory(
154
154
  env=self.env,
155
155
  update_by_cdc=update_by_cdc,
156
156
  struct=self.struct,
@@ -0,0 +1,104 @@
1
+ import io
2
+ import threading
3
+ import pandas as pd
4
+ from datetime import datetime
5
+ from ..Types import (
6
+ target_type_is_numeric,
7
+ LOCAL,
8
+ FULL,
9
+ PROD,
10
+ INCREMENTAL,
11
+ SAVED_S3,
12
+ PREFECT,
13
+ S3,
14
+ )
15
+ from ..Services.ExtractionOperation import ExtractionOperation
16
+ from ..Services.AdapterSourceTarget import AdapterSourceTarget
17
+ from ..Models.ExtractionOperation import ExtractionOperation as ExtractionOperationModel
18
+
19
+
20
+ class GenericReader:
21
+ def __init__(self, s3_credentials, struct, migrator_redshift_connector):
22
+ self.struct = struct
23
+ self.s3_credentials = s3_credentials
24
+ self.migrator_redshift_connector = migrator_redshift_connector
25
+
26
+ def __save_on_bucket(self, df, path_file, format="parquet"):
27
+ buffer = io.BytesIO()
28
+
29
+ if format == "csv":
30
+ df.to_csv(buffer, index=False)
31
+ else:
32
+ df.to_parquet(buffer, index=False, engine="pyarrow")
33
+ self.migrator_redshift_connector.s3_session.Object(
34
+ self.s3_credentials["bucket"],
35
+ path_file,
36
+ ).put(Body=buffer.getvalue())
37
+
38
+ buffer.close()
39
+
40
+ def __process_chunk(self, chunk_df, path_file, path_file_tmp):
41
+ adapter = AdapterSourceTarget(self.struct)
42
+ chunk_df_s3 = chunk_df.copy()
43
+
44
+ chunk_df_s3 = adapter.transform_data(chunk_df_s3, target_save=S3)
45
+
46
+ self.__save_on_bucket(chunk_df_s3, path_file)
47
+
48
+ chunk_df = adapter.convert_types(chunk_df)
49
+ chunk_df = adapter.transform_data(chunk_df)
50
+ chunk_df = adapter.equalize_number_columns(chunk_df)
51
+
52
+ self.__save_on_bucket(chunk_df, path_file_tmp, format="csv")
53
+
54
+ def save_data_to_s3(self, load_option=None):
55
+ self.load_option = load_option
56
+ self.migrator_redshift_connector.connect_s3()
57
+ self.migrator_redshift_connector.connect_source()
58
+
59
+ sql = self.get_sql_statement()
60
+
61
+ time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
62
+ idx = 1
63
+ path_file = None
64
+ threads = []
65
+
66
+ for chunk_df in pd.read_sql(
67
+ sql,
68
+ self.migrator_redshift_connector.source_conn,
69
+ chunksize=self.struct.read_batch_size,
70
+ ):
71
+ if len(chunk_df) != 0:
72
+ path_file = f"raw/prefect/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/{idx}.parquet"
73
+ path_file_tmp = f"raw/tmp/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/{idx}.csv"
74
+
75
+ thread = threading.Thread(target=self.__process_chunk, args=(chunk_df, path_file, path_file_tmp))
76
+ thread.start()
77
+ threads.append(thread)
78
+
79
+ idx = idx + 1
80
+
81
+ for thread in threads:
82
+ thread.join()
83
+
84
+ self.migrator_redshift_connector.close_source()
85
+
86
+ if path_file is None:
87
+ return None
88
+ else:
89
+ url = f's3://{self.s3_credentials["bucket"]}/raw/prefect/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/'
90
+
91
+ ExtractionOperation(
92
+ conn=self.migrator_redshift_connector.target_conn,
93
+ ).create(
94
+ struct=self.struct,
95
+ url=url,
96
+ load_option=self.load_option,
97
+ status=SAVED_S3,
98
+ platform=self.struct.extraction_engine,
99
+ )
100
+
101
+ return ExtractionOperationModel(
102
+ url=url,
103
+ load_option=self.load_option,
104
+ )
@@ -0,0 +1,82 @@
1
+ from ..Types import (
2
+ target_type_is_numeric,
3
+ LOCAL,
4
+ FULL,
5
+ INCREMENTAL,
6
+ )
7
+ from .GenericReader import GenericReader
8
+ from ..Contracts.ReaderInterface import ReaderInterface
9
+
10
+ class PostgresReader(GenericReader, ReaderInterface):
11
+ def get_incremental_statement(self):
12
+ if (
13
+ (
14
+ self.struct.source_incremental_column is not None
15
+ and self.struct.target_incremental_column is not None
16
+ and (self.load_option is None)
17
+ ) or (self.load_option == INCREMENTAL)
18
+ ):
19
+ sql = f"""
20
+ select max("{self.struct.target_incremental_column}") as max_value
21
+ from "{self.struct.target_schema}"."{self.struct.target_table}"
22
+ """
23
+
24
+ cursor = self.migrator_redshift_connector.target_conn.cursor()
25
+
26
+ cursor.execute(sql)
27
+ result = cursor.fetchall()
28
+
29
+ if len(result) == 0 or result[0][0] is None:
30
+ sql_return = ""
31
+ self.load_option = FULL
32
+ else:
33
+ for c in self.struct.columns:
34
+ if c["target_name"] == self.struct.target_incremental_column:
35
+ target_type = c["target_type"]
36
+
37
+ if target_type_is_numeric(target_type):
38
+ sql_return = f'and "{self.struct.source_incremental_column}" > {result[0][0]}'
39
+ else:
40
+ if (
41
+ self.struct.incremental_interval_delta is None
42
+ or self.struct.incremental_interval_delta == ""
43
+ ):
44
+ sql_return = f"and \"{self.struct.source_incremental_column}\" > '{result[0][0]}'"
45
+ else:
46
+ sql_return = f"and \"{self.struct.source_incremental_column}\" >= '{result[0][0]}'::timestamp - interval '{self.struct.incremental_interval_delta}'"
47
+
48
+ self.load_option = INCREMENTAL
49
+
50
+ cursor.close()
51
+
52
+ return sql_return
53
+ else:
54
+ if (self.load_option is None):
55
+ self.load_option = FULL
56
+ return ""
57
+
58
+ def get_columns_source(self):
59
+ return " * "
60
+
61
+ def get_order_by_sql_statement(self):
62
+ if self.struct.source_incremental_column is not None:
63
+ return f' order by "{self.struct.source_incremental_column}" asc'
64
+ else:
65
+ return ""
66
+
67
+ def get_limit_sql_statement(self):
68
+ if self.migrator_redshift_connector.env == LOCAL:
69
+ return f" limit 100"
70
+ else:
71
+ return f""
72
+
73
+ def get_sql_statement(self):
74
+ sql = f"""
75
+ select {self.get_columns_source()}
76
+ from "{self.struct.source_schema}"."{self.struct.source_table}"
77
+ where 1=1
78
+ {self.get_incremental_statement()}
79
+ {self.get_order_by_sql_statement()}
80
+ {self.get_limit_sql_statement()}
81
+ """
82
+ return sql
@@ -0,0 +1,9 @@
1
+ from .PostgresReader import PostgresReader
2
+ from ..Types import POSTGRES, SQLSERVER
3
+
4
+
5
+ def factory(s3_credentials, struct, migrator_redshift_connector):
6
+ if migrator_redshift_connector.source_conn_type == POSTGRES:
7
+ return PostgresReader(s3_credentials, struct, migrator_redshift_connector)
8
+ elif migrator_redshift_connector.source_conn_type == SQLSERVER:
9
+ return None
@@ -16,6 +16,8 @@ SAVED_REDSHIFT = "saved-redshift"
16
16
 
17
17
  S3 = "s3"
18
18
  REDSHIFT = "redshift"
19
+ POSTGRES = "postgres"
20
+ SQLSERVER = "sqlserver"
19
21
 
20
22
  MAX_VARCHAR_LENGTH = 60000
21
23
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maisaedu-poormans-dms
3
- Version: 1.1.76
3
+ Version: 1.1.78
4
4
  Summary: A library for making database migration tasks, for +A Education
5
5
  Home-page: UNKNOWN
6
6
  Author: A+ Educação
@@ -23,14 +23,17 @@ maisaedu_poormans_dms/postgres_migration/MigratorRow/__init__.py
23
23
  maisaedu_poormans_dms/redshift_migration/Connector.py
24
24
  maisaedu_poormans_dms/redshift_migration/Logger.py
25
25
  maisaedu_poormans_dms/redshift_migration/MigratorRedshift.py
26
- maisaedu_poormans_dms/redshift_migration/Reader.py
27
26
  maisaedu_poormans_dms/redshift_migration/Types.py
28
27
  maisaedu_poormans_dms/redshift_migration/__init__.py
28
+ maisaedu_poormans_dms/redshift_migration/Contracts/ReaderInterface.py
29
29
  maisaedu_poormans_dms/redshift_migration/Contracts/WriterInterface.py
30
30
  maisaedu_poormans_dms/redshift_migration/Contracts/__init__.py
31
31
  maisaedu_poormans_dms/redshift_migration/Models/ExtractionOperation.py
32
32
  maisaedu_poormans_dms/redshift_migration/Models/Struct.py
33
33
  maisaedu_poormans_dms/redshift_migration/Models/__init__.py
34
+ maisaedu_poormans_dms/redshift_migration/Reader/GenericReader.py
35
+ maisaedu_poormans_dms/redshift_migration/Reader/PostgresReader.py
36
+ maisaedu_poormans_dms/redshift_migration/Reader/__init__.py
34
37
  maisaedu_poormans_dms/redshift_migration/Services/AdapterSourceTarget.py
35
38
  maisaedu_poormans_dms/redshift_migration/Services/ExtractionOperation.py
36
39
  maisaedu_poormans_dms/redshift_migration/Services/RelationExtraction.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="maisaedu-poormans-dms",
5
- version="1.1.76",
5
+ version="1.1.78",
6
6
  description="A library for making database migration tasks, for +A Education",
7
7
  license="MIT License",
8
8
  author="A+ Educação",
@@ -1,166 +0,0 @@
1
- import io
2
- import pandas as pd
3
- from datetime import datetime
4
- from .Types import (
5
- target_type_is_numeric,
6
- LOCAL,
7
- FULL,
8
- PROD,
9
- INCREMENTAL,
10
- SAVED_S3,
11
- PREFECT,
12
- S3,
13
- )
14
- from .Services.ExtractionOperation import ExtractionOperation
15
- from .Services.AdapterSourceTarget import AdapterSourceTarget
16
- from .Models.ExtractionOperation import ExtractionOperation as ExtractionOperationModel
17
-
18
-
19
- class Reader:
20
- def __init__(self, s3_credentials, struct, migrator_redshift_connector):
21
- self.struct = struct
22
- self.s3_credentials = s3_credentials
23
- self.migrator_redshift_connector = migrator_redshift_connector
24
-
25
- def get_incremental_statement(self):
26
- if (
27
- (
28
- self.struct.source_incremental_column is not None
29
- and self.struct.target_incremental_column is not None
30
- and (self.load_option is None)
31
- ) or (self.load_option == INCREMENTAL)
32
- ):
33
- sql = f"""
34
- select max("{self.struct.target_incremental_column}") as max_value
35
- from "{self.struct.target_schema}"."{self.struct.target_table}"
36
- """
37
-
38
- cursor = self.migrator_redshift_connector.target_conn.cursor()
39
-
40
- cursor.execute(sql)
41
- result = cursor.fetchall()
42
-
43
- if len(result) == 0 or result[0][0] is None:
44
- sql_return = ""
45
- self.load_option = FULL
46
- else:
47
- for c in self.struct.columns:
48
- if c["target_name"] == self.struct.target_incremental_column:
49
- target_type = c["target_type"]
50
-
51
- if target_type_is_numeric(target_type):
52
- sql_return = f'and "{self.struct.source_incremental_column}" > {result[0][0]}'
53
- else:
54
- if (
55
- self.struct.incremental_interval_delta is None
56
- or self.struct.incremental_interval_delta == ""
57
- ):
58
- sql_return = f"and \"{self.struct.source_incremental_column}\" > '{result[0][0]}'"
59
- else:
60
- sql_return = f"and \"{self.struct.source_incremental_column}\" >= '{result[0][0]}'::timestamp - interval '{self.struct.incremental_interval_delta}'"
61
-
62
- self.load_option = INCREMENTAL
63
-
64
- cursor.close()
65
-
66
- return sql_return
67
- else:
68
- if (self.load_option is None):
69
- self.load_option = FULL
70
- return ""
71
-
72
- def get_columns_source(self):
73
- return " * "
74
-
75
- def get_order_by_sql_statement(self):
76
- if self.struct.source_incremental_column is not None:
77
- return f' order by "{self.struct.source_incremental_column}" asc'
78
- else:
79
- return ""
80
-
81
- def get_limit_sql_statement(self):
82
- if self.migrator_redshift_connector.env == LOCAL:
83
- return f" limit 100"
84
- else:
85
- return f""
86
-
87
- def get_sql_statement(self):
88
- sql = f"""
89
- select {self.get_columns_source()}
90
- from "{self.struct.source_schema}"."{self.struct.source_table}"
91
- where 1=1
92
- {self.get_incremental_statement()}
93
- {self.get_order_by_sql_statement()}
94
- {self.get_limit_sql_statement()}
95
- """
96
- return sql
97
-
98
- def save_on_bucket(self, df, path_file, format="parquet"):
99
- buffer = io.BytesIO()
100
-
101
- if format == "csv":
102
- df.to_csv(buffer, index=False)
103
- else:
104
- df.to_parquet(buffer, index=False, engine="pyarrow")
105
- self.migrator_redshift_connector.s3_session.Object(
106
- self.s3_credentials["bucket"],
107
- path_file,
108
- ).put(Body=buffer.getvalue())
109
-
110
- buffer.close()
111
-
112
- def save_data_to_s3(self, load_option=None):
113
- self.load_option = load_option
114
- self.migrator_redshift_connector.connect_s3()
115
- self.migrator_redshift_connector.connect_source()
116
-
117
- sql = self.get_sql_statement()
118
-
119
- time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
120
- idx = 1
121
- path_file = None
122
-
123
- for chunk_df in pd.read_sql(
124
- sql,
125
- self.migrator_redshift_connector.source_conn,
126
- chunksize=self.struct.read_batch_size,
127
- ):
128
- if len(chunk_df) != 0:
129
- path_file = f"raw/prefect/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/{idx}.parquet"
130
- path_file_tmp = f"raw/tmp/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/{idx}.csv"
131
-
132
- adapter = AdapterSourceTarget(self.struct)
133
- chunk_df_s3 = chunk_df.copy()
134
-
135
- chunk_df_s3 = adapter.transform_data(chunk_df_s3, target_save=S3)
136
-
137
- self.save_on_bucket(chunk_df_s3, path_file)
138
-
139
- chunk_df = adapter.convert_types(chunk_df)
140
- chunk_df = adapter.transform_data(chunk_df)
141
- chunk_df = adapter.equalize_number_columns(chunk_df)
142
-
143
- self.save_on_bucket(chunk_df, path_file_tmp, format="csv")
144
- idx = idx + 1
145
-
146
- self.migrator_redshift_connector.close_source()
147
-
148
- if path_file is None:
149
- return None
150
- else:
151
- url = f's3://{self.s3_credentials["bucket"]}/raw/prefect/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/'
152
-
153
- ExtractionOperation(
154
- conn=self.migrator_redshift_connector.target_conn,
155
- ).create(
156
- struct=self.struct,
157
- url=url,
158
- load_option=self.load_option,
159
- status=SAVED_S3,
160
- platform=self.struct.extraction_engine,
161
- )
162
-
163
- return ExtractionOperationModel(
164
- url=url,
165
- load_option=self.load_option,
166
- )