PyPI - maisaedu-poormans-dms - Versions diffs - 1.1.76__tar.gz → 1.1.78__tar.gz - Mend

maisaedu-poormans-dms 1.1.76tar.gz → 1.1.78tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

{maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: maisaedu-poormans-dms
-Version: 1.1.76
+Version: 1.1.78
 Summary: A library for making database migration tasks, for +A Education
 Home-page: UNKNOWN
 Author: A+ Educação

{maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Connector.py RENAMED Viewed

@@ -3,17 +3,28 @@ import psycopg2
 from sqlalchemy import create_engine
 from maisaedu_utilities_prefect.dw import get_red_credentials
-from .Types import DEV, LOCAL, get_iam_role
+from .Types import DEV, LOCAL, POSTGRES, SQLSERVER, get_iam_role
 class Connector:
     def __init__(self, env, s3_credentials, source_credentials, target_credentials):
         self.source_credentials = source_credentials
+        self.__set_source_conn_type()
         self.target_credentials = target_credentials
         self.s3_credentials = s3_credentials
         self.env = env
         self.iam_role = get_iam_role(env)
+    def __set_source_conn_type(self):
+        if self.source_credentials is None:
+            self.source_conn_type = None
+        else:
+            if 'type' not in self.source_credentials:
+                self.source_conn_type = POSTGRES
+            else:
+                self.source_conn_type = self.source_credentials['type']
     def connect_target(self):
         if self.target_credentials is None:
             if self.env == LOCAL:
@@ -48,10 +59,14 @@ class Connector:
         self.s3_session = session.resource("s3")
     def connect_source(self):
-        engine = create_engine(
-            f"postgresql+psycopg2://{self.source_credentials['user']}:{self.source_credentials['password']}@{self.source_credentials['host']}:{self.source_credentials['port']}/{self.source_credentials['database']}"
-        )
-        self.source_conn = engine.connect().execution_options(stream_results=True)
+        if self.source_conn_type == POSTGRES:
+            engine = create_engine(
+                f"postgresql+psycopg2://{self.source_credentials['user']}:{self.source_credentials['password']}@{self.source_credentials['host']}:{self.source_credentials['port']}/{self.source_credentials['database']}"
+            )
+            self.source_conn = engine.connect().execution_options(stream_results=True)
+        elif self.source_conn_type == SQLSERVER:
+            print("SQLSERVER")
+            # TODO
     def close_source(self):
         self.source_conn.close()

maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Contracts/ReaderInterface.py ADDED Viewed

@@ -0,0 +1,22 @@
+from abc import ABC, abstractmethod
+class ReaderInterface(ABC):
+    @abstractmethod
+    def get_incremental_statement(self) -> str:
+        pass
+    @abstractmethod
+    def get_columns_source(self) -> str:
+        pass
+    @abstractmethod
+    def get_order_by_sql_statement(self) -> str:
+        pass
+    @abstractmethod
+    def get_limit_sql_statement(self) -> str:
+        pass
+    @abstractmethod
+    def get_sql_statement(self) -> str:
+        pass

{maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/MigratorRedshift.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from .Connector import Connector
-from .Reader import Reader
+from .Reader import factory as reader_factory
 from .Logger import Logger
-from .Writer import factory
+from .Writer import factory as writer_factory
 from .Services.Struct import Struct
 from .Services.ExtractionOperation import ExtractionOperation
 from .Services.RelationExtraction import RelationExtraction
@@ -38,7 +38,7 @@ class MigratorRedshift:
         self.migrator_redshift_connector.connect_target()
-        self.migrator_redshift_reader = Reader(
+        self.migrator_redshift_reader = reader_factory(
             s3_credentials=s3_credentials,
             struct=struct,
             migrator_redshift_connector=self.migrator_redshift_connector,
@@ -150,7 +150,7 @@ class MigratorRedshift:
         self.__check_target_table_has_data()
         update_by_cdc = self.__check_table_will_be_updated_by_cdc(load_option)
-        self.migrator_redshift_writer = factory(
+        self.migrator_redshift_writer = writer_factory(
             env=self.env,
             update_by_cdc=update_by_cdc,
             struct=self.struct,

maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Reader/GenericReader.py ADDED Viewed

@@ -0,0 +1,104 @@
+import io
+import threading
+import pandas as pd
+from datetime import datetime
+from ..Types import (
+    target_type_is_numeric,
+    LOCAL,
+    FULL,
+    PROD,
+    INCREMENTAL,
+    SAVED_S3,
+    PREFECT,
+    S3,
+)
+from ..Services.ExtractionOperation import ExtractionOperation
+from ..Services.AdapterSourceTarget import AdapterSourceTarget
+from ..Models.ExtractionOperation import ExtractionOperation as ExtractionOperationModel
+class GenericReader:
+    def __init__(self, s3_credentials, struct, migrator_redshift_connector):
+        self.struct = struct
+        self.s3_credentials = s3_credentials
+        self.migrator_redshift_connector = migrator_redshift_connector
+    def __save_on_bucket(self, df, path_file, format="parquet"):
+        buffer = io.BytesIO()
+        if format == "csv":
+            df.to_csv(buffer, index=False)
+        else:
+            df.to_parquet(buffer, index=False, engine="pyarrow")
+        self.migrator_redshift_connector.s3_session.Object(
+            self.s3_credentials["bucket"],
+            path_file,
+        ).put(Body=buffer.getvalue())
+        buffer.close()
+    def __process_chunk(self, chunk_df, path_file, path_file_tmp):
+        adapter = AdapterSourceTarget(self.struct)
+        chunk_df_s3 = chunk_df.copy()
+        chunk_df_s3 = adapter.transform_data(chunk_df_s3, target_save=S3)
+        self.__save_on_bucket(chunk_df_s3, path_file)
+        chunk_df = adapter.convert_types(chunk_df)
+        chunk_df = adapter.transform_data(chunk_df)
+        chunk_df = adapter.equalize_number_columns(chunk_df)
+        self.__save_on_bucket(chunk_df, path_file_tmp, format="csv")
+    def save_data_to_s3(self, load_option=None):
+        self.load_option = load_option
+        self.migrator_redshift_connector.connect_s3()
+        self.migrator_redshift_connector.connect_source()
+        sql = self.get_sql_statement()
+        time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+        idx = 1
+        path_file = None
+        threads = []
+        for chunk_df in pd.read_sql(
+            sql,
+            self.migrator_redshift_connector.source_conn,
+            chunksize=self.struct.read_batch_size,
+        ):
+            if len(chunk_df) != 0:
+                path_file = f"raw/prefect/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/{idx}.parquet"
+                path_file_tmp = f"raw/tmp/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/{idx}.csv"
+                thread = threading.Thread(target=self.__process_chunk, args=(chunk_df, path_file, path_file_tmp))
+                thread.start()
+                threads.append(thread)
+                idx = idx + 1
+        for thread in threads:
+            thread.join()
+        self.migrator_redshift_connector.close_source()
+        if path_file is None:
+            return None
+        else:
+            url = f's3://{self.s3_credentials["bucket"]}/raw/prefect/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/'
+            ExtractionOperation(
+                conn=self.migrator_redshift_connector.target_conn,
+            ).create(
+                struct=self.struct,
+                url=url,
+                load_option=self.load_option,
+                status=SAVED_S3,
+                platform=self.struct.extraction_engine,
+            )
+            return ExtractionOperationModel(
+                url=url,
+                load_option=self.load_option,
+            )

maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Reader/PostgresReader.py ADDED Viewed

@@ -0,0 +1,82 @@
+from ..Types import (
+    target_type_is_numeric,
+    LOCAL,
+    FULL,
+    INCREMENTAL,
+)
+from .GenericReader import GenericReader
+from ..Contracts.ReaderInterface import ReaderInterface
+class PostgresReader(GenericReader, ReaderInterface):
+    def get_incremental_statement(self):
+        if (
+            (
+                self.struct.source_incremental_column is not None
+                and self.struct.target_incremental_column is not None
+                and (self.load_option is None)
+            ) or (self.load_option == INCREMENTAL)
+        ):
+            sql = f"""
+                select max("{self.struct.target_incremental_column}") as max_value
+                from "{self.struct.target_schema}"."{self.struct.target_table}"
+            """
+            cursor = self.migrator_redshift_connector.target_conn.cursor()
+            cursor.execute(sql)
+            result = cursor.fetchall()
+            if len(result) == 0 or result[0][0] is None:
+                sql_return = ""
+                self.load_option = FULL
+            else:
+                for c in self.struct.columns:
+                    if c["target_name"] == self.struct.target_incremental_column:
+                        target_type = c["target_type"]
+                if target_type_is_numeric(target_type):
+                    sql_return = f'and "{self.struct.source_incremental_column}" > {result[0][0]}'
+                else:
+                    if (
+                        self.struct.incremental_interval_delta is None
+                        or self.struct.incremental_interval_delta == ""
+                    ):
+                        sql_return = f"and \"{self.struct.source_incremental_column}\" > '{result[0][0]}'"
+                    else:
+                        sql_return = f"and \"{self.struct.source_incremental_column}\" >= '{result[0][0]}'::timestamp - interval '{self.struct.incremental_interval_delta}'"
+                self.load_option = INCREMENTAL
+            cursor.close()
+            return sql_return
+        else:
+            if (self.load_option is None):
+                self.load_option = FULL
+            return ""
+    def get_columns_source(self):
+        return " * "
+    def get_order_by_sql_statement(self):
+        if self.struct.source_incremental_column is not None:
+            return f' order by "{self.struct.source_incremental_column}" asc'
+        else:
+            return ""
+    def get_limit_sql_statement(self):
+        if self.migrator_redshift_connector.env == LOCAL:
+            return f" limit 100"
+        else:
+            return f""
+    def get_sql_statement(self):
+        sql = f"""
+            select {self.get_columns_source()}
+            from "{self.struct.source_schema}"."{self.struct.source_table}"
+            where 1=1
+            {self.get_incremental_statement()}
+            {self.get_order_by_sql_statement()}
+            {self.get_limit_sql_statement()}
+        """
+        return sql

maisaedu-poormans-dms-1.1.78/maisaedu_poormans_dms/redshift_migration/Reader/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .PostgresReader import PostgresReader
+from ..Types import POSTGRES, SQLSERVER
+def factory(s3_credentials, struct, migrator_redshift_connector):
+    if migrator_redshift_connector.source_conn_type == POSTGRES:
+      return PostgresReader(s3_credentials, struct, migrator_redshift_connector)
+    elif migrator_redshift_connector.source_conn_type == SQLSERVER:
+      return None

{maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms/redshift_migration/Types.py RENAMED Viewed

@@ -16,6 +16,8 @@ SAVED_REDSHIFT = "saved-redshift"
 S3 = "s3"
 REDSHIFT = "redshift"
+POSTGRES = "postgres"
+SQLSERVER = "sqlserver"
 MAX_VARCHAR_LENGTH = 60000

{maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: maisaedu-poormans-dms
-Version: 1.1.76
+Version: 1.1.78
 Summary: A library for making database migration tasks, for +A Education
 Home-page: UNKNOWN
 Author: A+ Educação

{maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/maisaedu_poormans_dms.egg-info/SOURCES.txt RENAMED Viewed

@@ -23,14 +23,17 @@ maisaedu_poormans_dms/postgres_migration/MigratorRow/__init__.py
 maisaedu_poormans_dms/redshift_migration/Connector.py
 maisaedu_poormans_dms/redshift_migration/Logger.py
 maisaedu_poormans_dms/redshift_migration/MigratorRedshift.py
-maisaedu_poormans_dms/redshift_migration/Reader.py
 maisaedu_poormans_dms/redshift_migration/Types.py
 maisaedu_poormans_dms/redshift_migration/__init__.py
+maisaedu_poormans_dms/redshift_migration/Contracts/ReaderInterface.py
 maisaedu_poormans_dms/redshift_migration/Contracts/WriterInterface.py
 maisaedu_poormans_dms/redshift_migration/Contracts/__init__.py
 maisaedu_poormans_dms/redshift_migration/Models/ExtractionOperation.py
 maisaedu_poormans_dms/redshift_migration/Models/Struct.py
 maisaedu_poormans_dms/redshift_migration/Models/__init__.py
+maisaedu_poormans_dms/redshift_migration/Reader/GenericReader.py
+maisaedu_poormans_dms/redshift_migration/Reader/PostgresReader.py
+maisaedu_poormans_dms/redshift_migration/Reader/__init__.py
 maisaedu_poormans_dms/redshift_migration/Services/AdapterSourceTarget.py
 maisaedu_poormans_dms/redshift_migration/Services/ExtractionOperation.py
 maisaedu_poormans_dms/redshift_migration/Services/RelationExtraction.py

{maisaedu-poormans-dms-1.1.76 → maisaedu-poormans-dms-1.1.78}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="maisaedu-poormans-dms",
-    version="1.1.76",
+    version="1.1.78",
     description="A library for making database migration tasks, for +A Education",
     license="MIT License",
     author="A+ Educação",

maisaedu-poormans-dms-1.1.76/maisaedu_poormans_dms/redshift_migration/Reader.py DELETED Viewed

@@ -1,166 +0,0 @@
-import io
-import pandas as pd
-from datetime import datetime
-from .Types import (
-    target_type_is_numeric,
-    LOCAL,
-    FULL,
-    PROD,
-    INCREMENTAL,
-    SAVED_S3,
-    PREFECT,
-    S3,
-)
-from .Services.ExtractionOperation import ExtractionOperation
-from .Services.AdapterSourceTarget import AdapterSourceTarget
-from .Models.ExtractionOperation import ExtractionOperation as ExtractionOperationModel
-class Reader:
-    def __init__(self, s3_credentials, struct, migrator_redshift_connector):
-        self.struct = struct
-        self.s3_credentials = s3_credentials
-        self.migrator_redshift_connector = migrator_redshift_connector
-    def get_incremental_statement(self):
-        if (
-            (
-                self.struct.source_incremental_column is not None
-                and self.struct.target_incremental_column is not None
-                and (self.load_option is None)
-            ) or (self.load_option == INCREMENTAL)
-        ):
-            sql = f"""
-                select max("{self.struct.target_incremental_column}") as max_value
-                from "{self.struct.target_schema}"."{self.struct.target_table}"
-            """
-            cursor = self.migrator_redshift_connector.target_conn.cursor()
-            cursor.execute(sql)
-            result = cursor.fetchall()
-            if len(result) == 0 or result[0][0] is None:
-                sql_return = ""
-                self.load_option = FULL
-            else:
-                for c in self.struct.columns:
-                    if c["target_name"] == self.struct.target_incremental_column:
-                        target_type = c["target_type"]
-                if target_type_is_numeric(target_type):
-                    sql_return = f'and "{self.struct.source_incremental_column}" > {result[0][0]}'
-                else:
-                    if (
-                        self.struct.incremental_interval_delta is None
-                        or self.struct.incremental_interval_delta == ""
-                    ):
-                        sql_return = f"and \"{self.struct.source_incremental_column}\" > '{result[0][0]}'"
-                    else:
-                        sql_return = f"and \"{self.struct.source_incremental_column}\" >= '{result[0][0]}'::timestamp - interval '{self.struct.incremental_interval_delta}'"
-                self.load_option = INCREMENTAL
-            cursor.close()
-            return sql_return
-        else:
-            if (self.load_option is None):
-                self.load_option = FULL
-            return ""
-    def get_columns_source(self):
-        return " * "
-    def get_order_by_sql_statement(self):
-        if self.struct.source_incremental_column is not None:
-            return f' order by "{self.struct.source_incremental_column}" asc'
-        else:
-            return ""
-    def get_limit_sql_statement(self):
-        if self.migrator_redshift_connector.env == LOCAL:
-            return f" limit 100"
-        else:
-            return f""
-    def get_sql_statement(self):
-        sql = f"""
-            select {self.get_columns_source()}
-            from "{self.struct.source_schema}"."{self.struct.source_table}"
-            where 1=1
-            {self.get_incremental_statement()}
-            {self.get_order_by_sql_statement()}
-            {self.get_limit_sql_statement()}
-        """
-        return sql
-    def save_on_bucket(self, df, path_file, format="parquet"):
-        buffer = io.BytesIO()
-        if format == "csv":
-            df.to_csv(buffer, index=False)
-        else:
-            df.to_parquet(buffer, index=False, engine="pyarrow")
-        self.migrator_redshift_connector.s3_session.Object(
-            self.s3_credentials["bucket"],
-            path_file,
-        ).put(Body=buffer.getvalue())
-        buffer.close()
-    def save_data_to_s3(self, load_option=None):
-        self.load_option = load_option
-        self.migrator_redshift_connector.connect_s3()
-        self.migrator_redshift_connector.connect_source()
-        sql = self.get_sql_statement()
-        time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
-        idx = 1
-        path_file = None
-        for chunk_df in pd.read_sql(
-            sql,
-            self.migrator_redshift_connector.source_conn,
-            chunksize=self.struct.read_batch_size,
-        ):
-            if len(chunk_df) != 0:
-                path_file = f"raw/prefect/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/{idx}.parquet"
-                path_file_tmp = f"raw/tmp/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/{idx}.csv"
-                adapter = AdapterSourceTarget(self.struct)
-                chunk_df_s3 = chunk_df.copy()
-                chunk_df_s3 = adapter.transform_data(chunk_df_s3, target_save=S3)
-                self.save_on_bucket(chunk_df_s3, path_file)
-                chunk_df = adapter.convert_types(chunk_df)
-                chunk_df = adapter.transform_data(chunk_df)
-                chunk_df = adapter.equalize_number_columns(chunk_df)
-                self.save_on_bucket(chunk_df, path_file_tmp, format="csv")
-                idx = idx + 1
-        self.migrator_redshift_connector.close_source()
-        if path_file is None:
-            return None
-        else:
-            url = f's3://{self.s3_credentials["bucket"]}/raw/prefect/{self.migrator_redshift_connector.env}/{self.struct.target_schema}/{self.struct.target_table}/{time}/'
-            ExtractionOperation(
-                conn=self.migrator_redshift_connector.target_conn,
-            ).create(
-                struct=self.struct,
-                url=url,
-                load_option=self.load_option,
-                status=SAVED_S3,
-                platform=self.struct.extraction_engine,
-            )
-            return ExtractionOperationModel(
-                url=url,
-                load_option=self.load_option,
-            )