PyPI - dbhydra - Versions diffs - 2.1.2__tar.gz → 2.2.0__tar.gz - Mend

dbhydra 2.1.2tar.gz → 2.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{dbhydra-2.1.2 → dbhydra-2.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dbhydra
-Version: 2.1.2
+Version: 2.2.0
 Summary: Data science friendly ORM combining Python
 Home-page: https://github.com/DovaX/dbhydra
 Author: DovaX

{dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/abstract_db.py RENAMED Viewed

@@ -105,6 +105,7 @@ class AbstractDb(abc.ABC):
         self.active_transactions=[]
         self.last_table_inserted_into: Optional[str] = None
+        self.identifier_quote = ''
     @abc.abstractmethod
     def connect_locally(self):

{dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/abstract_table.py RENAMED Viewed

@@ -132,12 +132,13 @@ class AbstractSelectable:
     def select_all(self):
+        quote = self.db1.identifier_quote
         all_cols_query = ""
         for col in self.columns:
-            all_cols_query = all_cols_query + col + ","
+            all_cols_query = all_cols_query + quote + col + quote + ","
         if all_cols_query[-1] == ",":
             all_cols_query = all_cols_query[:-1]
-        list1 = self.select(f"SELECT {all_cols_query} FROM " + self.name)
+        list1 = self.select(f"SELECT {all_cols_query} FROM {quote}{self.name}{quote};")
         return (list1)
     def select_to_df(self):
@@ -244,7 +245,6 @@ class AbstractTable(AbstractJoinable, abc.ABC):
             self.column_type_dict={self.columns[i]:self.types[i] for i,x in enumerate(self.columns)}
         else:
             self.column_type_dict={}
     # Temporary disabled, please make sure this is implemented where needed, don't introduce breaking changes please
     # @abc.abstractmethod
@@ -421,11 +421,12 @@ class AbstractTable(AbstractJoinable, abc.ABC):
     def delete(self, where=None):
+        quote = self.db1.identifier_quote
         if where is None:
-            query = "DELETE FROM " + self.name
+            query = "DELETE FROM {quote}{self.name}{quote}"
         else:
-            query = "DELETE FROM " + self.name + " WHERE " + where
+            query = f"DELETE FROM {quote}{self.name}{quote} WHERE {where}"
         return self.execute(query)

dbhydra-2.2.0/dbhydra/src/migrator.py ADDED Viewed

@@ -0,0 +1,395 @@
+import os
+import math
+import json
+import pandas as pd
+from typing import Optional
+from deepdiff import DeepDiff
+from dataclasses import dataclass, asdict
+CURRENT_MIGRATION_DEFAULT_PATH = "./db/migrations/current_migration.json"
+MIGRATION_HISTORY_DEFAULT_PATH = "./db/migrations/migration_history.json"
+@dataclass
+class Migration:
+    forward: list[dict]
+    backward: list[dict]
+class Migrator:
+    """
+    A class for managing database migrations.
+    This class provides functionality to create, manage, and execute database migrations
+    using a migration system compatible with MySQL and Postgres dialects. It allows for
+    creating forward and backward migrations, reading and writing migrations to JSON files,
+    and executing migrations based on changes detected in database structures.
+    Note: This class is compatible with MySQL and Postgres dialects and has been somewhat tested
+    with those databases. It may require adjustments for other database systems.
+    Attributes:
+        db: The database connection object used for executing migrations.
+    """
+    def __init__(self, db):
+        self.db = db
+        # Used in older implementations, TODO: decide whether to keep both approaches, unify them or pick one
+        self._migration_number = 1
+        self._migration_list = []
+        # Used in newer approach
+        self._current_migration = Migration(forward=[], backward=[])
+    def process_migration_dict(self, migration_dict):
+        matching_table_class = self.db.matching_table_class #E.g. MysqlTable
+        assert len(migration_dict.keys()) == 1
+        operation = list(migration_dict.keys())[0]
+        options = migration_dict[operation]
+        if operation == "create":
+            table = matching_table_class(self.db, options["table_name"], options["columns"], options["types"])
+            table.convert_types_from_mysql()
+            table.create()
+        elif operation == "drop":
+            table = matching_table_class(self.db, options["table_name"])
+            table.drop()
+        elif operation == "add_column":
+            table = matching_table_class(self.db, options["table_name"])
+            table.initialize_columns()
+            table.initialize_types()
+            table.convert_types_from_mysql()
+            table.add_column(options["column_name"], options["column_type"])
+        elif operation == "modify_column":
+            table = matching_table_class(self.db, options["table_name"])
+            table.initialize_columns()
+            table.initialize_types()
+            table.convert_types_from_mysql()
+            table.modify_column(options["column_name"], options["column_type"])
+        elif operation == "drop_column":
+            table = matching_table_class(self.db, options["table_name"])
+            table.initialize_columns()
+            table.initialize_types()
+            table.drop_column(options["column_name"])
+    # Old approach methods START
+    def next_migration(self):
+        self._migration_number += 1
+        self._migration_list = []
+    def migrate(self, migration_list):
+        for i, migration_dict in enumerate(migration_list):
+            self.process_migration_dict(migration_dict)
+    def migrate_from_json(self, filename):
+        with open(filename, "r") as f:
+            rows = f.readlines()[0].replace("\n", "")
+        result = json.loads(rows)
+        for dict in result:
+            self.process_migration_dict(dict)
+        return (result)
+    def migration_list_to_json(self, filename=None):
+        result = json.dumps(self._migration_list)
+        if filename is None or filename == "" or filename.isspace():
+            with open("migrations/migration-" + str(self._migration_number) + ".json", "w+") as f:
+                f.write(result)
+        else:
+            with open(f"migrations/{filename}.json", "w+") as f:
+                f.write(result)
+    def create_migrations_from_df(self, name, dataframe):
+        columns, return_types = self.extract_columns_and_types_from_df(dataframe)
+        migration_dict = {"create": {"table_name": name, "columns": columns, "types": return_types}}
+        self._migration_list.append(migration_dict)
+        self.migration_list_to_json()
+        # return columns, return_types
+    def extract_columns_and_types_from_df(self, dataframe):
+        columns = list(dataframe.columns)
+        return_types = []
+        if columns == []:
+            return ["id"], ["int"]
+        for column in dataframe:
+            if dataframe.empty:
+                return_types.append(type(None).__name__)
+                continue
+            t = dataframe.loc[0, column]
+            try:
+                if pd.isna(t):
+                    return_types.append(type(None).__name__)
+                else:
+                    try:
+                        return_types.append(type(t.item()).__name__)
+                    except:
+                        return_types.append(type(t).__name__)
+            except:
+                # length = 2**( int(dataframe[col].str.len().max()) - 1).bit_length()
+                length = int(dataframe[column].str.len().max())
+                length += 0.1 * length
+                length = int(math.ceil(length / 10.0)) * 10
+                return_types.append(f'nvarchar({length})' if type(t).__name__ == 'str' else type(t).__name__)
+        if (columns[0] != "id"):
+            columns.insert(0, "id")
+            return_types.insert(0, "int")
+        return columns, return_types
+    # Old approach methods END
+    def set_current_migration(self, migration_dict: dict[str, list]):
+        self._current_migration = Migration(**migration_dict)
+    def migrate_forward(self):
+        """
+        Applies forward migrations from the current migration object.
+        Iterates through each migration dictionary in the current migration's forward list,
+        processes the migration, saves it to migration history, and clears the current migration.
+        Returns:
+            None
+        """
+        for migration_dict in self._current_migration.forward:
+            self.process_migration_dict(migration_dict)
+        self._save_migration_to_history(migration=self._current_migration)
+        self._clear_current_migration()
+    def migrate_backward(self):
+        """
+        Applies backward migrations from the current migration object.
+        Iterates through each migration dictionary in the current migration's backward list,
+        processes the migration, saves it to migration history, and clears the current migration.
+        Returns:
+            None
+        """
+        for migration_dict in self._current_migration.backward:
+            self.process_migration_dict(migration_dict)
+        history_migration = Migration(forward=self._current_migration.backward, backward=self._current_migration.forward)
+        self._save_migration_to_history(migration=history_migration)
+        self._clear_current_migration()
+    def migrate_n_steps_back_in_history(self, n: int, migration_history_json: str = MIGRATION_HISTORY_DEFAULT_PATH):
+        migration_history = self._read_migration_history_json(migration_history_json)
+        if len(migration_history) < n:
+            raise ValueError(f"Provided n (= {n}) is larger than migration history length (= {len(migration_history)}).")
+        total_backward_migration = Migration(forward=[], backward=[])
+        migrations = migration_history[-n:] # Take last n elements of migration history for execution
+        # Loop in reversed order as we execute backward migrations in reversed order compared to forward ones
+        for migration_dict in reversed(migrations):
+            total_backward_migration.forward.append(migration_dict["forward"])
+            total_backward_migration.backward.append(migration_dict["backward"])
+        self.set_current_migration(asdict(total_backward_migration))
+        self.migrate_backward()
+    def load_migration_from_json(self, json_file_path: str = CURRENT_MIGRATION_DEFAULT_PATH):
+        with open(json_file_path, "r") as file:
+            migration_dict = json.load(file)
+        self.set_current_migration(migration_dict)
+    def save_current_migration_to_json(self, file_path: str = CURRENT_MIGRATION_DEFAULT_PATH):
+        if not file_path.endswith(".json"):
+            raise ValueError("Current migration file must be of '.json' type.")
+        self._build_folder_structure_for_file_path(file_path)
+        with open(file_path, "w+") as file:
+            json.dump(asdict(self._current_migration), file, indent=2)
+    def create_table_migration(self, table_name: str, old_structure: Optional[dict], new_structure: Optional[dict]):
+        """
+        Creates a migration for a database table based on its old and new structures.
+        Args:
+            table_name (str): The name of the database table.
+            old_structure (Optional[dict]): The old structure of the table.
+            new_structure (Optional[dict]): The new structure of the table.
+            If old_structure is None and new_structure is not None: CREATE table
+            If old_structure is not None and new_structure is None: DROP table
+        Returns:
+            Migration: The generated migration object.
+        Raises:
+            ValueError: If the table_name argument is empty.
+        """
+        if not table_name:
+            raise ValueError("The 'table_name' argument must be a non-empty string.")
+        if not old_structure and new_structure:
+            # non-empty initial structure --> empty new structure
+            columns, types = list(new_structure.keys()), list(new_structure.values())
+            forward_migration = [{"create": {"table_name": table_name, "columns": columns, "types": types}}]
+            backward_migration = [{"drop": {"table_name": table_name}}]
+            migration = Migration(forward=forward_migration, backward=backward_migration)
+        elif not new_structure:
+            # new structure is empty ==> drop the table
+            forward_migration = [{"drop": {"table_name": table_name}}]
+            backward_migration = [{"create": {"table_name": table_name, "columns": columns, "types": types}}]
+            migration = Migration(forward=forward_migration, backward=backward_migration)
+        else:
+            diff = DeepDiff(old_structure, new_structure, verbose_level=2)
+            migration = self._convert_deepdiff_dict_into_migration(table_name, diff)
+        self._merge_migration_to_current_migration(migration=migration)
+        return migration
+    def _convert_deepdiff_dict_into_migration(self, table_name: str, deepdiff_dict: dict) -> Migration:
+        """
+        Converts deepdiff dictionary from the new and old table structures comparison into a Migration object.
+        Args:
+            table_name (str): A name of the examined DB table.
+            deepdiff_dict (dict): A dictionary from DeepDiff comparison of the old and new table structure.
+        Returns:
+            Migration: A Migration object containing forward and backward migrations for the given table.
+        Example:
+            >>> table_name = 'results'
+            >>> deepdiff_dict = {'dictionary_item_removed': {"root['hehexd']": 'double'}}
+            >>> migrator = Migrator()
+            >>> asdict(migrator._convert_deepdiff_dict_into_migration)
+            >>> {
+                'forward': [
+                    {'drop_column': {'table_name': 'results', 'column_name': 'hehexd'}}
+                    ],
+                'backward': [
+                    {'add_column': {'table_name': 'results', 'column_name': 'hehexd', 'column_type': 'double'}}
+                    ]
+                }
+        """
+        forward_migration, backward_migration = [], []
+        forward_conversions = {
+            "dictionary_item_added": "add_column",
+            "dictionary_item_removed": "drop_column",
+            "values_changed": "modify_column"
+            }
+        backward_conversions = {
+            "dictionary_item_added": "drop_column",
+            "dictionary_item_removed": "add_column",
+            "values_changed": "modify_column"
+            }
+        for action_name, deepdiff_action in deepdiff_dict.items():
+            for deepdiff_key in deepdiff_action.keys():
+                column_name = self._extract_column_name_from_deepdiff_key(deepdiff_key)
+                forward_action, backward_action = forward_conversions[action_name], backward_conversions[action_name]
+                if action_name=="dictionary_item_added":
+                    column_type = deepdiff_action[deepdiff_key]
+                    forward_migration.append({forward_action: {"table_name": table_name, "column_name": column_name, "column_type": column_type}})
+                    backward_migration.append({backward_action: {"table_name": table_name, "column_name": column_name}})
+                elif action_name=="dictionary_item_removed":
+                    column_type = deepdiff_action[deepdiff_key]
+                    forward_migration.append({forward_action: {"table_name": table_name, "column_name": column_name}})
+                    backward_migration.append({backward_action: {"table_name": table_name, "column_name": column_name, "column_type": column_type}})
+                elif action_name=="values_changed":
+                    column_type = deepdiff_action[deepdiff_key]["old_value"]
+                    column_new_type = deepdiff_action[deepdiff_key]["new_value"]
+                    # HACK: Do not create migrations for cases such as varchar(2047) --> nvarchar(2047)
+                    is_varchar_in_types = "varchar" in column_type and "varchar" in column_new_type
+                    is_max_length_equal = (
+                        column_type[column_type.index("("): column_type.index(")")]
+                        and column_new_type[column_new_type.index("("): column_new_type.index(")")]
+                        ) if is_varchar_in_types else False
+                    is_varchar_nvarchar_conversion = is_varchar_in_types and is_max_length_equal
+                    if not is_varchar_nvarchar_conversion:
+                        forward_migration.append({forward_action: {"table_name": table_name, "column_name": column_name,
+                                                            "column_type": column_new_type}})
+                        backward_migration.append({backward_action: {"table_name": table_name, "column_name": column_name,
+                                                            "column_type": column_type}})
+        return Migration(forward=forward_migration, backward=backward_migration)
+    def _extract_column_name_from_deepdiff_key(self, deepdiff_key: str) -> str:
+        """
+        Extracts the column name from a key generated by deepdiff.
+        Args:
+            deepdiff_key (str): The key generated by deepdiff.
+        Returns:
+            str: The extracted column name.
+        Example:
+            >>> migrator = Migrator()
+            >>> column_name = migrator._extract_column_name_from_deepdiff_key("root['table']['column']")
+            >>> print(column_name)
+            'column'
+        """
+        # Split the item_key by '[' and ']' to isolate the column name
+        # The column name is expected to be the last element after splitting
+        column_name = deepdiff_key.split('[')[-1].strip("']")
+        return column_name
+    def _merge_migration_to_current_migration(self, migration: Migration):
+        new_forward_part = self._current_migration.forward + migration.forward
+        new_backward_part = self._current_migration.backward + migration.backward
+        self._current_migration = Migration(forward=new_forward_part, backward=new_backward_part)
+    def _clear_current_migration(self):
+        self._current_migration = Migration(forward=[], backward=[])
+    def _read_migration_history_json(self, file_path: str = MIGRATION_HISTORY_DEFAULT_PATH):
+        if not file_path.endswith(".json"):
+            raise ValueError("Migration history file must be of '.json' type.")
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"Migration history file '{file_path}' does not exist.")
+        try:
+            with open(file_path, "r") as file:
+                migration_history = json.load(file)
+        except json.JSONDecodeError:
+            migration_history = []
+        return migration_history
+    def _save_migration_to_history(self, migration: Migration, file_path: str = MIGRATION_HISTORY_DEFAULT_PATH):
+        try:
+            migration_history = self._read_migration_history_json(file_path)
+        except FileNotFoundError:
+            self._build_folder_structure_for_file_path(file_path)
+            migration_history = []
+        migration_history.append(asdict(migration))
+        with open(file_path, "w") as file:
+            json.dump(migration_history, file, indent=2)
+    def _build_folder_structure_for_file_path(self, file_path: str):
+        folder_path = os.path.dirname(file_path)
+        if not os.path.exists(folder_path):
+            print(f"Folder path to the file '{file_path}' does not exist. Creating the file and the folder structure.")
+            os.makedirs(folder_path)

{dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/mysql_db.py RENAMED Viewed

@@ -22,6 +22,10 @@ class MysqlDb(AbstractDb):
     'Jsonable': "json"
     }
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.identifier_quote = '`'
     def connect_locally(self):
         self.connection = pymysql.connect(host=self.DB_SERVER, user=self.DB_USERNAME, password=self.DB_PASSWORD,
                                           database=self.DB_DATABASE)

{dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/tables.py RENAMED Viewed

@@ -2,7 +2,7 @@ import pandas as pd
 import numpy as np
 from typing import Optional, Any
 import abc
+import time
 #xlsx imports
 import pathlib
@@ -655,7 +655,7 @@ class MysqlTable(AbstractTable):
     """
     def get_data_types_and_character_lengths(self):
         information_schema_table = MysqlTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
-        query = f"SELECT DATA_TYPE,character_maximum_length FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND TABLE_NAME  = '" + self.name + "'"
+        query = f"SELECT DATA_TYPE,character_maximum_length FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND TABLE_NAME  = '{self.name}'"
         types = information_schema_table.select(query)
         data_types = [x[0] for x in types]
         data_lengths = [x[1] for x in types]
@@ -725,12 +725,12 @@ class MysqlTable(AbstractTable):
         Returns the number of records in table
         """
-        num_of_records = self.select(f"SELECT COUNT(*) FROM {self.name};")
+        num_of_records = self.select(f"SELECT COUNT(*) FROM `{self.name}`;")
         return num_of_records[0][0]
     def drop(self):
-        query = "DROP TABLE " + self.name + ";"
+        query = "DROP TABLE `" + self.name + "`;"
         print(query)
         self.db1.execute(query)
@@ -742,9 +742,9 @@ class MysqlTable(AbstractTable):
         column_type_pairs = list(zip(self.columns, self.types))[1:]
         fields = ", ".join(
-            [f"{column} {type_.upper()}" for column, type_ in column_type_pairs]
+            [f"`{column}` {type_.upper()}" for column, type_ in column_type_pairs]
         )
-        query = f"CREATE TABLE {self.name} ({self.id_column_name} INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, {fields})"
+        query = f"CREATE TABLE `{self.name}` ({self.id_column_name} INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, {fields})"
         print(query)
         try:
@@ -759,14 +759,11 @@ class MysqlTable(AbstractTable):
         total_output=[]
         for k in range(len(rows)):
             if k % batch == 0:
-                query = "INSERT INTO " + self.name + " ("
+                query = "INSERT INTO `" + self.name + "` ("
                 for i in range(start_index, len(self.columns)):
                     if i < len(rows[k]) + 1:
-                        # column name containing space needs to be wrapped in `...`, otherwise causes syntax error
-                        if " " in self.columns[i]:
-                            column_name = '`' + self.columns[i] + '`'
-                        else:
-                            column_name = self.columns[i]
+                        # column name containing space/reserved keyword needs to be wrapped in `...`, otherwise causes syntax error
+                        column_name = '`' + self.columns[i] + '`'
                         query += column_name + ","
                 if len(rows) < len(self.columns):
                     print(len(self.columns) - len(rows), "columns were not specified")
@@ -846,17 +843,17 @@ class MysqlTable(AbstractTable):
     def add_foreign_key(self, foreign_key):
         parent_id = foreign_key['parent_id']
         parent = foreign_key['parent']
-        query = "ALTER TABLE " + self.name + " MODIFY " + parent_id + " INT UNSIGNED"
+        query = "ALTER TABLE `" + self.name + "` MODIFY " + parent_id + " INT UNSIGNED"
         print(query)
         self.db1.execute(query)
-        query = "ALTER TABLE " + self.name + " ADD FOREIGN KEY (" + parent_id + ") REFERENCES " + parent + "("+self.id_column_name+")"
+        query = "ALTER TABLE `" + self.name + "` ADD FOREIGN KEY (" + parent_id + ") REFERENCES " + parent + "("+self.id_column_name+")"
         print(query)
         self.db1.execute(query)
     @save_migration
     def add_column(self, column_name, column_type):
         assert len(column_name) > 1
-        command = "ALTER TABLE " + self.name + " ADD COLUMN " + column_name + " " + column_type
+        command = "ALTER TABLE `" + self.name + "` ADD COLUMN `" + column_name + "` " + column_type
         try:
             self.db1.execute(command)
             self.columns.append(column_name)
@@ -867,7 +864,7 @@ class MysqlTable(AbstractTable):
     @save_migration
     def drop_column(self, column_name):
         assert len(column_name) > 1
-        command = "ALTER TABLE " + self.name + " DROP COLUMN " + column_name
+        command = "ALTER TABLE `" + self.name + "` DROP COLUMN " + column_name
         try:
             print(command)
             self.db1.execute(command)
@@ -881,7 +878,7 @@ class MysqlTable(AbstractTable):
     @save_migration
     def modify_column(self, column_name, new_column_type):
         assert len(column_name) > 1
-        command = "ALTER TABLE " + self.name + " MODIFY COLUMN " + column_name + " " + new_column_type
+        command = "ALTER TABLE `" + self.name + "` MODIFY COLUMN `" + column_name + "` " + new_column_type
         print(command)
         try:
             self.db1.execute(command)
@@ -894,9 +891,10 @@ class MysqlTable(AbstractTable):
 ############### XLSX ##################
 class XlsxTable(AbstractTable):
-    def __init__(self, db1, name, columns=None, types=None, id_column_name = "id"):
+    def __init__(self, db1, name, columns=None, types=None, id_column_name = "id", number_of_retries=5):
         super().__init__(db1, name, columns, types)
         self.id_column_name = id_column_name
+        self.NUMBER_OF_RETRIES = number_of_retries
         table_filename = f"{self.name}.csv" if self.db1.is_csv else f"{self.name}.xlsx"
         self.table_directory_path: pathlib.Path = self.db1.db_directory_path / table_filename
@@ -960,23 +958,33 @@ class XlsxTable(AbstractTable):
             column for column, type_ in self.column_type_dict.items() if type_ == "datetime"
         ]
-        try:
-            if self.db1.is_csv:
-                df = pd.read_csv(
-                    self.table_directory_path, dtype=column_type_map,
-                    parse_dates=date_columns, encoding='utf-8'
-                )
-            else:
-                df = pd.read_excel(
-                    self.table_directory_path, dtype=column_type_map,
-                    parse_dates=date_columns
-                )
-            df.replace({np.nan: None}, inplace=True)
-        except Exception as e:
-            print(f"Error while reading data into XlsxTable: {e}")
-            df = pd.DataFrame(columns=self.columns)
+        # BUG: If XlsxTable is being accessed by multiple threads, read operation
+        # might fail due to race conditions. Add retry mechanism to handle these cases.
+        for attempt in range(self.NUMBER_OF_RETRIES):
+            try:
+                df = self._select(column_type_map, date_columns)
+            except Exception:
+                # print(f"Error while reading data into XlsxTable: {e}")
+                # df = pd.DataFrame(columns=self.columns)
+                if attempt < self.NUMBER_OF_RETRIES:
+                    time.sleep(0.1)
+                    continue
+                else:
+                    print(f"Failed to read data from {self.table_directory_path}, returning empty DataFrame")
+                    df = pd.DataFrame(columns=self.columns)
+        return df
+    def _select(self, column_type_map, date_columns):
+        if self.db1.is_csv:
+            df = pd.read_csv(
+                self.table_directory_path, dtype=column_type_map, parse_dates=date_columns,
+                encoding='utf-8'
+            )
+        else:
+            df = pd.read_excel(
+                self.table_directory_path, dtype=column_type_map, parse_dates=date_columns
+            )
+        df.replace({np.nan: None}, inplace=True)
         return df
     def insert_from_df(self, df, batch=1, try_mode=False, debug_mode=False, adjust_df=False, insert_id=False):
@@ -1026,7 +1034,7 @@ class XlsxTable(AbstractTable):
         df.reset_index(drop=True, inplace=True)
         self._save_table(df)
-        self.last_table_inserted_into = self.name
+        self.db1.last_table_inserted_into = self.name
     def replace_from_df(self, df):
         assert len(df.columns) == len(self.columns)  # +1 because of id column

{dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dbhydra
-Version: 2.1.2
+Version: 2.2.0
 Summary: Data science friendly ORM combining Python
 Home-page: https://github.com/DovaX/dbhydra
 Author: DovaX

{dbhydra-2.1.2 → dbhydra-2.2.0}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
 setuptools.setup(
     name='dbhydra',
-    version='2.1.2',
+    version='2.2.0',
     author='DovaX',
     author_email='dovax.ai@gmail.com',
     description='Data science friendly ORM combining Python',

dbhydra-2.1.2/dbhydra/src/migrator.py DELETED Viewed

@@ -1,114 +0,0 @@
-import pandas as pd
-import math
-import json
-class Migrator:
-    """It was somewhat tested only for MySQL and Postgres dialect"""
-    def __init__(self, db=None):
-        self.db = db
-        self.migration_number = 1
-        self.migration_list = []
-    def process_migration_dict(self, migration_dict):
-        matching_table_class = self.db.matching_table_class #E.g. MysqlTable
-        assert len(migration_dict.keys()) == 1
-        operation = list(migration_dict.keys())[0]
-        options = migration_dict[operation]
-        if operation == "create":
-            table = matching_table_class(self.db, options["table_name"], options["columns"], options["types"])
-            table.convert_types_from_mysql()
-            table.create()
-        elif operation == "drop":
-            table = matching_table_class(self.db, options["table_name"])
-            table.drop()
-        elif operation == "add_column":
-            table = matching_table_class(self.db, options["table_name"])
-            table.initialize_columns()
-            table.initialize_types()
-            table.convert_types_from_mysql()
-            table.add_column(options["column_name"], options["column_type"])
-        elif operation == "modify_column":
-            table = matching_table_class(self.db, options["table_name"])
-            table.initialize_columns()
-            table.initialize_types()
-            table.convert_types_from_mysql()
-            table.modify_column(options["column_name"], options["column_type"])
-        elif operation == "drop_column":
-            table = matching_table_class(self.db, options["table_name"])
-            table.initialize_columns()
-            table.initialize_types()
-            table.drop_column(options["column_name"])
-    def next_migration(self):
-        self.migration_number += 1
-        self.migration_list = []
-    def migrate(self, migration_list):
-        for i, migration_dict in enumerate(migration_list):
-            self.process_migration_dict(migration_dict)
-    def migrate_from_json(self, filename):
-        with open(filename, "r") as f:
-            rows = f.readlines()[0].replace("\n", "")
-        result = json.loads(rows)
-        for dict in result:
-            self.process_migration_dict(dict)
-        return (result)
-    def migration_list_to_json(self, filename=None):
-        result = json.dumps(self.migration_list)
-        if filename is None or filename == "" or filename.isspace():
-            with open("migrations/migration-" + str(self.migration_number) + ".json", "w+") as f:
-                f.write(result)
-        else:
-            with open(f"migrations/{filename}.json", "w+") as f:
-                f.write(result)
-    def create_migrations_from_df(self, name, dataframe):
-        columns, return_types = self.extract_columns_and_types_from_df(dataframe)
-        migration_dict = {"create": {"table_name": name, "columns": columns, "types": return_types}}
-        self.migration_list.append(migration_dict)
-        self.migration_list_to_json()
-        # return columns, return_types
-    def extract_columns_and_types_from_df(self, dataframe):
-        columns = list(dataframe.columns)
-        return_types = []
-        if columns == []:
-            return ["id"], ["int"]
-        for column in dataframe:
-            if dataframe.empty:
-                return_types.append(type(None).__name__)
-                continue
-            t = dataframe.loc[0, column]
-            try:
-                if pd.isna(t):
-                    return_types.append(type(None).__name__)
-                else:
-                    try:
-                        return_types.append(type(t.item()).__name__)
-                    except:
-                        return_types.append(type(t).__name__)
-            except:
-                # length = 2**( int(dataframe[col].str.len().max()) - 1).bit_length()
-                length = int(dataframe[column].str.len().max())
-                length += 0.1 * length
-                length = int(math.ceil(length / 10.0)) * 10
-                return_types.append(f'nvarchar({length})' if type(t).__name__ == 'str' else type(t).__name__)
-        if (columns[0] != "id"):
-            columns.insert(0, "id")
-            return_types.insert(0, "int")
-        return columns, return_types